6.1. List對象
List對象是“變長對象”。
6.1.1. Python中的建立
Python中List對象最重要的建立方法為PyList_New,如下Python語句最終會調用到PyList_New:
test = [1, 2, 3, 4, 5]
6.1.2. PyList_New的C調用棧
// pystate.c
PyInterpreterState_New
// ceval.c
=>_PyEval_EvalFrameDefault (case BUILD_LIST)
// listobject.c
=> PyList_New
6.1.3. PyList_New源碼
// listobject.c
PyObject *
PyList_New(Py_ssize_t size)
{
PyListObject *op;
#ifdef SHOW_ALLOC_COUNT
static int initialized = 0;
if (!initialized) {
Py_AtExit(show_alloc);
initialized = 1;
}
#endif
if (size < 0) {
PyErr_BadInternalCall();
return NULL;
}
if (numfree) {
numfree--;
op = free_list[numfree];
_Py_NewReference((PyObject *)op);
#ifdef SHOW_ALLOC_COUNT
count_reuse++;
#endif
} else {
op = PyObject_GC_New(PyListObject, &PyList_Type);
if (op == NULL)
return NULL;
#ifdef SHOW_ALLOC_COUNT
count_alloc++;
#endif
}
if (size <= 0)
op->ob_item = NULL;
else {
op->ob_item = (PyObject **) PyMem_Calloc(size, sizeof(PyObject *));
if (op->ob_item == NULL) {
Py_DECREF(op);
return PyErr_NoMemory();
}
}
Py_SIZE(op) = size;
op->allocated = size;
_PyObject_GC_TRACK(op);
return (PyObject *) op;
}
可以看到:
- List對象資料結構:
// listobject.h
typedef struct {
PyObject_VAR_HEAD
PyObject **ob_item;
Py_ssize_t allocated;
} PyListObject;
ob_size和allocated并不一樣,類似于STL中的vector的size和capacity。
- List對象緩沖池:隻緩沖PyListObject對象指針,裡面包含的ob_item全部被釋放。List對象緩沖區大小為80。
// listobject.c
#ifndef PyList_MAXFREELIST
#define PyList_MAXFREELIST 80
#endif
static PyListObject *free_list[PyList_MAXFREELIST];
static int numfree = 0;
static void
list_dealloc(PyListObject *op)
{
Py_ssize_t i;
PyObject_GC_UnTrack(op);
Py_TRASHCAN_SAFE_BEGIN(op)
if (op->ob_item != NULL) {
/* Do it backwards, for Christian Tismer.
There's a simple test case where somehow this reduces
thrashing when a *very* large list is created and
immediately deleted. */
i = Py_SIZE(op);
while (--i >= 0) {
Py_XDECREF(op->ob_item[i]);
}
PyMem_FREE(op->ob_item);
}
if (numfree < PyList_MAXFREELIST && PyList_CheckExact(op))
free_list[numfree++] = op;
else
Py_TYPE(op)->tp_free((PyObject *)op);
Py_TRASHCAN_SAFE_END(op)
}
6.2. List對象的維護
6.2.1. 設定元素
test = [0, 1, 2, 3, 4]
test[0] = 100
上面的Python代碼會調用C中的list_ass_subscript方法,由于是index case,是以最終會調用list_ass_item方法。需要注意,此處将v的引用計數加1,并且将之前的引用減1。
// listobject.c
static PySequenceMethods list_as_sequence = {
(lenfunc)list_length, /* sq_length */
(binaryfunc)list_concat, /* sq_concat */
(ssizeargfunc)list_repeat, /* sq_repeat */
(ssizeargfunc)list_item, /* sq_item */
0, /* sq_slice */
(ssizeobjargproc)list_ass_item, /* sq_ass_item */
0, /* sq_ass_slice */
(objobjproc)list_contains, /* sq_contains */
(binaryfunc)list_inplace_concat, /* sq_inplace_concat */
(ssizeargfunc)list_inplace_repeat, /* sq_inplace_repeat */
};
static int
list_ass_item(PyListObject *a, Py_ssize_t i, PyObject *v)
{
if (i < 0 || i >= Py_SIZE(a)) {
PyErr_SetString(PyExc_IndexError,
"list assignment index out of range");
return -1;
}
if (v == NULL)
return list_ass_slice(a, i, i+1, v);
Py_INCREF(v);
Py_SETREF(a->ob_item[i], v);
return 0;
}
Py_SETREF宏定義如下:
// object.h
#define Py_SETREF(op, op2) \
do { \
PyObject *_py_tmp = (PyObject *)(op); \
(op) = (op2); \
Py_DECREF(_py_tmp); \
} while (0)
6.2.2. 追加元素
test = []
test.append(1000)
上面的Python代碼會調用C中的list_append方法,最終會調用app1方法。
// listobject.h
#define LIST_APPEND_METHODDEF \
{"append", (PyCFunction)list_append, METH_O, list_append__doc__},
#define PyList_SET_ITEM(op, i, v) (((PyListObject *)(op))->ob_item[i] = (v))
#define PyList_GET_SIZE(op) (assert(PyList_Check(op)),Py_SIZE(op))
app1方法中擷取List對象裡的ob_item長度,調整ob_item大小,對傳入的v增加引用,并設定第n位為v。
// listobject.c
static PyMethodDef list_methods[] = {
// sth.
LIST_APPEND_METHODDEF
// sth.
};
static PyObject *
list_append(PyListObject *self, PyObject *object)
{
if (app1(self, object) == 0)
Py_RETURN_NONE;
return NULL;
}
static int
app1(PyListObject *self, PyObject *v)
{
Py_ssize_t n = PyList_GET_SIZE(self);
assert (v != NULL);
if (n == PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError,
"cannot add more objects to list");
return -1;
}
if (list_resize(self, n+1) < 0)
return -1;
Py_INCREF(v);
PyList_SET_ITEM(self, n, v);
return 0;
}
6.2.3. 插入元素
test = [1, 2, 3, 4, 5]
test.insert(0, 10)
test.insert(-1, 100)
上面的Python代碼會調用C中的list_insert方法,最終會調用ins1方法。
// listobject.h
#define LIST_INSERT_METHODDEF \
{"insert", (PyCFunction)list_insert, METH_FASTCALL, list_insert__doc__},
static PyObject *
list_insert_impl(PyListObject *self, Py_ssize_t index, PyObject *object);
static PyObject *
list_insert(PyListObject *self, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
Py_ssize_t index;
PyObject *object;
if (!_PyArg_ParseStack(args, nargs, "nO:insert",
&index, &object)) {
goto exit;
}
return_value = list_insert_impl(self, index, object);
exit:
return return_value;
}
在ins1方法中調整ob_item大小,計算插入位置,對插入位置進行容錯處理,将插入位置之後的元素全部向後移一位,對傳入的v增加引用,并設定第where位為v。插入元素支援傳入負值。
// listobject.c
static PyMethodDef list_methods[] = {
// sth.
LIST_INSERT_METHODDEF
// sth.
};
static PyObject *
list_insert_impl(PyListObject *self, Py_ssize_t index, PyObject *object)
{
if (ins1(self, index, object) == 0)
Py_RETURN_NONE;
return NULL;
}
static int
ins1(PyListObject *self, Py_ssize_t where, PyObject *v)
{
Py_ssize_t i, n = Py_SIZE(self);
PyObject **items;
if (v == NULL) {
PyErr_BadInternalCall();
return -1;
}
if (n == PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError,
"cannot add more objects to list");
return -1;
}
if (list_resize(self, n+1) < 0)
return -1;
if (where < 0) {
where += n;
if (where < 0)
where = 0;
}
if (where > n)
where = n;
items = self->ob_item;
for (i = n; --i >= where; )
items[i+1] = items[i];
Py_INCREF(v);
items[where] = v;
return 0;
}
6.2.4. 删除元素
test = [1, 2, 3, 4, 5]
test.remove(3)
上面的Python代碼會調用C中的list_remove方法。
// listobject.h
#define LIST_REMOVE_METHODDEF \
{"remove", (PyCFunction)list_remove, METH_O, list_remove__doc__},
list_remove方法周遊整個list,發現第一個比對上的對象,調用list_ass_slice方法進行删除。
// listobject.c
static PyMethodDef list_methods[] = {
// sth.
LIST_REMOVE_METHODDEF
// sth.
};
static PyObject *
list_remove(PyListObject *self, PyObject *value)
{
Py_ssize_t i;
for (i = 0; i < Py_SIZE(self); i++) {
int cmp = PyObject_RichCompareBool(self->ob_item[i], value, Py_EQ);
if (cmp > 0) {
if (list_ass_slice(self, i, i+1,
(PyObject *)NULL) == 0)
Py_RETURN_NONE;
return NULL;
}
else if (cmp < 0)
return NULL;
}
PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list");
return NULL;
}
list_ass_slice并不是專門用于删除的方法,根據方法的注釋可以看出,由于list_remove方法調用list_ass_slice,傳入NULL,是以相當于是删除。
a[ilow:ihigh] = v if v != NULL.
del a[ilow:ihigh] if v == NULL.
list_ass_slice的實作主要是用memmove來移動記憶體。值得注意的是此處使用recycle_on_stack節省對堆上記憶體的使用。
// listobject.c
static int
list_ass_slice(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v)
{
PyObject *recycle_on_stack[8];
PyObject **recycle = recycle_on_stack;
PyObject **item;
PyObject **vitem = NULL;
PyObject *v_as_SF = NULL;
Py_ssize_t n;
Py_ssize_t norig;
Py_ssize_t d;
Py_ssize_t k;
size_t s;
int result = -1;
#define b ((PyListObject *)v)
if (v == NULL)
n = 0;
else {
// do sth.
}
if (ilow < 0)
ilow = 0;
else if (ilow > Py_SIZE(a))
ilow = Py_SIZE(a);
if (ihigh < ilow)
ihigh = ilow;
else if (ihigh > Py_SIZE(a))
ihigh = Py_SIZE(a);
norig = ihigh - ilow;
assert(norig >= 0);
d = n - norig;
if (Py_SIZE(a) + d == 0) {
Py_XDECREF(v_as_SF);
return _list_clear(a);
}
item = a->ob_item;
s = norig * sizeof(PyObject *);
if (s) {
if (s > sizeof(recycle_on_stack)) {
recycle = (PyObject **)PyMem_MALLOC(s);
if (recycle == NULL) {
PyErr_NoMemory();
goto Error;
}
}
memcpy(recycle, &item[ilow], s);
}
if (d < 0) { /* Delete -d items */
Py_ssize_t tail;
tail = (Py_SIZE(a) - ihigh) * sizeof(PyObject *);
memmove(&item[ihigh+d], &item[ihigh], tail);
if (list_resize(a, Py_SIZE(a) + d) < 0) {
memmove(&item[ihigh], &item[ihigh+d], tail);
memcpy(&item[ilow], recycle, s);
goto Error;
}
item = a->ob_item;
}
else if (d > 0) { /* Insert d items */
k = Py_SIZE(a);
if (list_resize(a, k+d) < 0)
goto Error;
item = a->ob_item;
memmove(&item[ihigh+d], &item[ihigh],
(k - ihigh)*sizeof(PyObject *));
}
for (k = 0; k < n; k++, ilow++) {
PyObject *w = vitem[k];
Py_XINCREF(w);
item[ilow] = w;
}
for (k = norig - 1; k >= 0; --k)
Py_XDECREF(recycle[k]);
result = 0;
Error:
if (recycle != recycle_on_stack)
PyMem_FREE(recycle);
Py_XDECREF(v_as_SF);
return result;
#undef b
}
6.2.5. 調整大小
無論是append、insert還是remove都會調用list_resize,重新調整list的allocated屬性。list_resize源碼如下:
// listobject.c
static int
list_resize(PyListObject *self, Py_ssize_t newsize)
{
PyObject **items;
size_t new_allocated, num_allocated_bytes;
Py_ssize_t allocated = self->allocated;
if (allocated >= newsize && newsize >= (allocated >> 1)) {
assert(self->ob_item != NULL || newsize == 0);
Py_SIZE(self) = newsize;
return 0;
}
new_allocated = (size_t)newsize + (newsize >> 3) + (newsize < 9 ? 3 : 6);
if (new_allocated > (size_t)PY_SSIZE_T_MAX / sizeof(PyObject *)) {
PyErr_NoMemory();
return -1;
}
if (newsize == 0)
new_allocated = 0;
num_allocated_bytes = new_allocated * sizeof(PyObject *);
items = (PyObject **)PyMem_Realloc(self->ob_item, num_allocated_bytes);
if (items == NULL) {
PyErr_NoMemory();
return -1;
}
self->ob_item = items;
Py_SIZE(self) = newsize;
self->allocated = new_allocated;
return 0;
}
可以看出:
- 如果new size小于等于allocated,且大于等于allocated/2,則不修改allocated,僅僅将list的size設定為new size;
-
否則重新計算allocated 值并realloc;
值得注意的是resize會擴大記憶體也會收縮記憶體。
6.3. List對象的特性
支援tp_as_sequence、tp_as_mapping兩種操作。
6.3.1. 序列操作
// listobject.c
&list_as_sequence, /* tp_as_sequence */
// listobject.c
static PySequenceMethods list_as_sequence = {
(lenfunc)list_length, /* sq_length */
(binaryfunc)list_concat, /* sq_concat */
(ssizeargfunc)list_repeat, /* sq_repeat */
(ssizeargfunc)list_item, /* sq_item */
0, /* sq_slice */
(ssizeobjargproc)list_ass_item, /* sq_ass_item */
0, /* sq_ass_slice */
(objobjproc)list_contains, /* sq_contains */
(binaryfunc)list_inplace_concat, /* sq_inplace_concat */
(ssizeargfunc)list_inplace_repeat, /* sq_inplace_repeat */
};
其中:
- list_length
len([1, 2, 3, 4, 5])
- list_concat
[0] + [1]
- list_repeat
[0]*10
- list_item:暫時沒有找到相應Python語句;
- list_ass_item
test = [0, 1, 2, 3, 4]
test[0] = 100
- list_contains
test = [0, 1, 2]
0 in test
- list_inplace_concat
test = [0, 1, 2]
test += [3]
- list_inplace_repeat
test = [0, 1, 2]
test *= 10
6.3.2. 關聯操作
// listobject.c
&list_as_mapping, /* tp_as_mapping */
// listobject.c
static PyMappingMethods list_as_mapping = {
(lenfunc)list_length,
(binaryfunc)list_subscript,
(objobjargproc)list_ass_subscript
};
- list_subscript
test = [0, 1, 2, 3, 4]
test[1]
test[0:3]
a[1]會走index case,a[0:3]會走slice case
- list_ass_subscript
test = [0, 1, 2, 3, 4]
test[0] = 100
test[1:3] = [1000]
test[0]會走list_ass_subscript方法的index分支,test[1:3]會走slice分支;
6.3.3. to string
// listobject.c
(reprfunc)list_repr, /* tp_repr */
0, /* tp_str */
6.3.4. hash
// listobject.c
PyObject_HashNotImplemented, /* tp_hash */
6.3.5. 比較
// listobject.c
list_richcompare, /* tp_richcompare */
6.3.6. 内置方法
// listobject.c
list_methods, /* tp_methods */
6.4 參考
- Python源碼剖析