最近在公司内部的问答系统上有同事问了一个问题:Python中的type()__class__有什么差别?

>>> class Foo(object):
    pass

>>> class Bar(object):
    pass

>>> class Brion(object):
    pass

>>> class ASML(object):
    __class__ = Foo


    
>>> b = Bar()
>>> a = ASML()

Case 1

>>> b.__class__, type(b)
(<class '__main__.Bar'>, <class '__main__.Bar'>)
>>>
>>> b.__class__ = Foo
>>>
>>> b.__class__, type(b)
(<class '__main__.Foo'>, <class '__main__.Foo'>)

Case 2

>>> a.__class__, type(a)
(<class '__main__.Foo'>, <class '__main__.ASML'>)
>>>
>>> a.__class__ = Brion
>>>
>>> a.__class__, type(a)
(<class '__main__.Brion'>, <class '__main__.ASML'>)

大家看出Case 1Case 2的差别了吧。问题来了:
1. type(obj)到底做了些什么事情?
2. 为什么a在改变__class__后,type(a)还是ASML呢?

为了解决这些问题,我们需要深入Python源代码。以下源代码来自Python 2.7.8

Python Object

Python中一切皆对象。所有对象的数据结构都以一个PyObject_HEAD开头。

object.h

typedef struct _object {
    PyObject_HEAD
} PyObject;

/* PyObject_HEAD defines the initial segment of every PyObject. */
#define PyObject_HEAD                   \
    _PyObject_HEAD_EXTRA                \
    Py_ssize_t ob_refcnt;               \
    struct _typeobject *ob_type;

这里的ob_refcnt是用来做引用计数的,而ob_type则是对象所对应的type对象。

type对象包含了很多关于对象的元信息:类型名字(tp_name),创建该类型对象时分配内存空间大小的信息(tp_basicsizetp_itemsize),一些操作信息(tp_call, tp_new等),还有其他如__mro__(tp_mro), __bases__(tp_bases)等。

object.h

typedef struct _typeobject {
    PyObject_VAR_HEAD
    const char *tp_name; /* For printing, in format "<module>.<name>" */
    Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation */

    ...

    /* More standard operations (here for binary compatibility) */
    ternaryfunc tp_call;
    ...

    /* Attribute descriptor and subclassing stuff */
    PyObject *tp_dict;
    newfunc tp_new;
    PyObject *tp_bases;
    PyObject *tp_mro; /* method resolution order */
    PyObject *tp_subclasses;
    ...

    ...

} PyTypeObject;

对于对象f = Foo()来说,它的ob_type就是Foo, 而Fooob_type则是type

type(obj)到底做了些什么事情

上面提到的每个对象都有的ob_type其实就是type(obj)返回的对象。

我们看下运行type(obj)时调用到的一系列函数。

abstract.c

PyObject *
PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw)
{
    ternaryfunc call;

    if ((call = func->ob_type->tp_call) != NULL) {
        PyObject *result;
        if (Py_EnterRecursiveCall(" while calling a Python object"))
            return NULL;
        result = (*call)(func, arg, kw);
        ...
        return result;
    }
    ...
    return NULL;
}

这里的funcobjob_type,以f = Foo()为例的话就是Foo。那func->ob_type当然就是type了。

typeobject.c

PyTypeObject PyType_Type = {
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
    "type",                                     /* tp_name */
    ...
    (ternaryfunc)type_call,                     /* tp_call */
    ...
    type_new,                                   /* tp_new */
    ...
}

从上面PyType_Type的定义可以看到typetp_call就是type_call函数:

typeobject.c

static PyObject *
type_call(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
    PyObject *obj;

    if (type->tp_new == NULL) {
        PyErr_Format(PyExc_TypeError,
                     "cannot create '%.100s' instances",
                     type->tp_name);
        return NULL;
    }

    obj = type->tp_new(type, args, kwds);
    if (obj != NULL) {
        /* Ugly exception: when the call was type(something),
           don't call tp_init on the result. */
        if (type == &PyType_Type &&
            PyTuple_Check(args) && PyTuple_GET_SIZE(args) == 1 &&
            (kwds == NULL ||
             (PyDict_Check(kwds) && PyDict_Size(kwds) == 0)))
            return obj;                         // Yun: type(obj) returns from here
        /* If the returned object is not an instance of type,
           it won't be initialized. */
        if (!PyType_IsSubtype(obj->ob_type, type))
            return obj;
        type = obj->ob_type;
        if (PyType_HasFeature(type, Py_TPFLAGS_HAVE_CLASS) &&
            type->tp_init != NULL &&
            type->tp_init(obj, args, kwds) < 0) {
            Py_DECREF(obj);
            obj = NULL;
        }
    }
    return obj;                              // Yun: type(cls, bases, dict) returns from here

type_call中先调用tp_new指向的函数(type_new),然后再做分支。对type(obj)调用来说就是直接返回tp_new得到的对象。而对type(cls, bases, dict)来说还会调用tp_init指向的函数,这在自定义metaclass时会用到。

typeobject.c

static PyObject *
type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds)
{
    ...

    /* Special case: type(x) should return x->ob_type */
    {
        const Py_ssize_t nargs = PyTuple_GET_SIZE(args);
        const Py_ssize_t nkwds = kwds == NULL ? 0 : PyDict_Size(kwds);

        if (PyType_CheckExact(metatype) && nargs == 1 && nkwds == 0) {
            PyObject *x = PyTuple_GET_ITEM(args, 0);
            Py_INCREF(Py_TYPE(x));
            return (PyObject *) Py_TYPE(x);
        }

        ...
    }

    ...
}

object.h

#define Py_TYPE(ob)             (((PyObject*)(ob))->ob_type)

可见type(obj)其实就是返回对象的ob_type

为什么a在改变__class__后,type(a)还是ASML

要回答这个问题,我们要先回顾下通过obj.xxx查找对象的 attribute 时的搜索顺序:
1. type对象及其基类的__dict__。如果是 data descriptor,返回这个 data descriptor的 __get__ 结果
2. obj的__dict__
3. 第一步中找到的如果是 non-data descriptor, 返回这个 non-data descriptor的 __get__ 结果
4. type对象中的__dict__,也就是直接返回第一步中找到的对象

obj.__class__就是一个 attribute 查找。

typeobject.c

static PyGetSetDef object_getsets[] = {
    {"__class__", object_get_class, object_set_class,
     PyDoc_STR("the object's class")},
    {0}
};

PyTypeObject PyBaseObject_Type = {
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
    "object",                                   /* tp_name */
    ...
    PyObject_GenericGetAttr,                    /* tp_getattro */
    PyObject_GenericSetAttr,                    /* tp_setattro */
    ...
    object_getsets,                             /* tp_getset */
    ...
};

来看下f = Foo(); f.__class__;中的函数调用链:

object.c

PyObject *
PyObject_GetAttr(PyObject *v, PyObject *name)
{
    PyTypeObject *tp = Py_TYPE(v);

    ...

    if (tp->tp_getattro != NULL)
        return (*tp->tp_getattro)(v, name);

    ...
    return NULL;
}

这里的v就是f, 而tp就是Foo, tp->tp_getattro就是PyObject_GenericGetAttr函数。

object.c

PyObject *
PyObject_GenericGetAttr(PyObject *obj, PyObject *name)
{
    return _PyObject_GenericGetAttrWithDict(obj, name, NULL);
}

PyObject *
_PyObject_GenericGetAttrWithDict(PyObject *obj, PyObject *name, PyObject *dict)
{
    PyTypeObject *tp = Py_TYPE(obj);
    PyObject *descr = NULL;
    PyObject *res = NULL;
    
    ...
    descr = _PyType_Lookup(tp, name);

    Py_XINCREF(descr);

    f = NULL;
    if (descr != NULL &&
        PyType_HasFeature(descr->ob_type, Py_TPFLAGS_HAVE_CLASS)) {
        f = descr->ob_type->tp_descr_get;
        if (f != NULL && PyDescr_IsData(descr)) {
            res = f(descr, obj, (PyObject *)obj->ob_type);
            Py_DECREF(descr);
            goto done;
        }
    }

    ...
    return res;
}

PyObject *
_PyType_Lookup(PyTypeObject *type, PyObject *name)
{
    Py_ssize_t i, n;
    PyObject *mro, *res, *base, *dict;
    unsigned int h;

    ...

    /* Look in tp_dict of types in MRO */
    mro = type->tp_mro;

    ...

    res = NULL;
    assert(PyTuple_Check(mro));
    n = PyTuple_GET_SIZE(mro);
    for (i = 0; i < n; i++) {
        base = PyTuple_GET_ITEM(mro, i);
        if (PyClass_Check(base))
            dict = ((PyClassObject *)base)->cl_dict;
        else {
            assert(PyType_Check(base));
            dict = ((PyTypeObject *)base)->tp_dict;
        }
        assert(dict && PyDict_Check(dict));
        res = PyDict_GetItem(dict, name);
        if (res != NULL)
            break;
    }

    ...
    return res;
}

_PyObject_GenericGetAttrWithDict中先调用_PyType_LookupFootp_mro中查到__class__属性(来自Foo的基类object),该属性是一个data descriptor,最终调用了object_get_class

typeobject.c

static PyObject *
object_get_class(PyObject *self, void *closure)
{
    Py_INCREF(Py_TYPE(self));
    return (PyObject *)(Py_TYPE(self));
}

object_get_class函数可以看出,对于f = Foo(); f.__class__;来说也是返回的Foo对象的ob_type。这就解释了Case 1中为什么type(b)b.__class__是相等的。 ___

那为什么Case 2中的type(a)a.__class__不相等呢?

因为Case 1中没有自定义__class__,所以查找__class__时在Bar中没找到,接着就去Bar的基类object中找,正好object中定义了一个__class__的 data descriptor, 就返回这个。

而在Case 2中我们自定义了__class__,所以在ASML.__dict__中找到有这个 attribute 后就返回了,不会再去找mro中的下一个(object)。但是这里找到的这个 attribute 不是 data descriptor,根据前面提到的 attribute 搜索顺序,我们接着在a.__dict__中找,也没有,那就直接返回ASML中的找到的那个了。 ___

为什么设置__class__后,Case 1和Case 2有差别

obj.xxx = yyy 设置attribute时的顺序
1. 先从type对象及其基类的__dict__中查找该 attribute,找到就返回。如果找到的是 data descriptor,则用该 data descriptor的__set__来设置
2. 否则添加到obj.__dict__

Case 1中得到的__class__是一个 data descriptor,给它赋值实际上调用的是object_set_class函数。

typeobject.c

static int
object_set_class(PyObject *self, PyObject *value, void *closure)
{
    PyTypeObject *oldto = Py_TYPE(self);
    PyTypeObject *newto;

    ...
    newto = (PyTypeObject *)value;
    ...
    if (compatible_for_assignment(newto, oldto, "__class__")) {
        Py_INCREF(newto);
        Py_TYPE(self) = newto;
        Py_DECREF(oldto);
        return 0;
    }
    else {
        return -1;
    }
}

从该函数的实现可以看到b.__class__ = Foo实际上会把bob_type设为Foo。所以赋值后type(b)b.__class__都跟着变了。

>>> b.__dict__
{}
>>> Bar.__dict__
dict_proxy({'__dict__': <attribute '__dict__' of 'Bar' objects>, '__module__': '__main__', '__weakref__': <attribute '__weakref__' of 'Bar' objects>, '__doc__': None})
>>>
>>> b.__class__ = Foo
>>>
>>> b.__class__, type(b)
(<class '__main__.Foo'>, <class '__main__.Foo'>)
>>> b.__dict__
{}
>>> Bar.__dict__
dict_proxy({'__dict__': <attribute '__dict__' of 'Bar' objects>, '__module__': '__main__', '__weakref__': <attribute '__weakref__' of 'Bar' objects>, '__doc__': None})

Case 2中得到的__class__不是 data descriptor。所以a.__class__ = Foo会在a.__dict__中添加一条记录,而aob_type不会变,ASML.__dict__也不会变。

>>> a.__dict__
{}
>>> ASML.__dict__
dict_proxy({'__dict__': <attribute '__dict__' of 'ASML' objects>, '__module__': '__main__', '__weakref__': <attribute '__weakref__' of 'ASML' objects>, '__class__': <class '__main__.Foo'>, '__doc__': None})
>>>
>>> a.__class__ = Brion
>>>
>>> a.__dict__
{'__class__': <class '__main__.Brion'>}
>>> ASML.__dict__
dict_proxy({'__dict__': <attribute '__dict__' of 'ASML' objects>, '__module__': '__main__', '__weakref__': <attribute '__weakref__' of 'ASML' objects>, '__class__': <class '__main__.Foo'>, '__doc__': None})

以上的讨论都是基于new style class。对于old style class来说, type()不等于__class__:

>>> class A():
    pass

>>> a = A()
>>> a.__class__, type(a)
(<class __main__.A at 0x0270CFB8>, <type 'instance'>)

另外一个关于 isintance(obj, cls) 的问题

>>> class Foo(object):
    pass
    
>>> class ASML(object):
    __class__ = Foo
    
>>> a = ASML()
>>> isinstance(a, Foo)
True
>>> isinstance(a, ASML)
True

为什么这里的两个isinstance都返回True呢?

object.h

#define Py_TYPE(ob)             (((PyObject*)(ob))->ob_type)

#bltinmodule.c
static PyMethodDef builtin_methods[] = {
    ...
    {"isinstance",  builtin_isinstance, METH_VARARGS, isinstance_doc},
    ...
}

static PyObject *
builtin_isinstance(PyObject *self, PyObject *args)
{
    PyObject *inst;
    PyObject *cls;
    int retval;

    if (!PyArg_UnpackTuple(args, "isinstance", 2, 2, &inst, &cls))
        return NULL;

    retval = PyObject_IsInstance(inst, cls);
    if (retval < 0)
        return NULL;
    return PyBool_FromLong(retval);
}

最终isinstance(obj, cls)会调用PyObject_IsInstance

abstract.c

int
PyObject_IsInstance(PyObject *inst, PyObject *cls) // Yun: "isinstance(obj, cls)" will call it
{
    static PyObject *name = NULL;

    /* Quick test for an exact match */
    if (Py_TYPE(inst) == (PyTypeObject *)cls)      // Yun: "isinstacne(b, ASML)" returns True
        return 1;

    ...
    if (!(PyClass_Check(cls) || PyInstance_Check(cls))) {
        PyObject *checker;
        checker = _PyObject_LookupSpecial(cls, "__instancecheck__", &name);
        ...
        res = PyObject_CallFunctionObjArgs(checker, inst, NULL);  // Yun: "isinstance(b, Foo)" call recursive_isinstance() and returns True
        if (res != NULL) {
            ok = PyObject_IsTrue(res);
            ...
        }
        return ok;
    }
    return recursive_isinstance(inst, cls);        
}

static int
recursive_isinstance(PyObject *inst, PyObject *cls)
{
    ...
    static PyObject *__class__ = NULL;
    int retval = 0;
    if (__class__ == NULL) {
        __class__ = PyString_InternFromString("__class__");
        if (__class__ == NULL)
            return -1;
    }
    ...

    if (PyClass_Check(cls) && PyInstance_Check(inst)) {
        ...
    }
    else if (PyType_Check(cls)) {
        retval = PyObject_TypeCheck(inst, (PyTypeObject *)cls);
        if (retval == 0) {
            PyObject *c = PyObject_GetAttr(inst, __class__);
            ...
            retval = PyType_IsSubtype(
                    (PyTypeObject *)c,
                    (PyTypeObject *)cls);      // Yun: Both "c" and "cls" are "Foo" here
    }
    ...

    return retval;
}

__instancecheck__recursive_isinstance:

typeobject.c

static PyMethodDef type_methods[] = {
    {"mro", (PyCFunction)mro_external, METH_NOARGS,
     PyDoc_STR("mro() -> list\nreturn a type's method resolution order")},
    {"__subclasses__", (PyCFunction)type_subclasses, METH_NOARGS,
     PyDoc_STR("__subclasses__() -> list of immediate subclasses")},
    {"__instancecheck__", type___instancecheck__, METH_O,
     PyDoc_STR("__instancecheck__() -> bool\ncheck if an object is an instance")},
    {"__subclasscheck__", type___subclasscheck__, METH_O,
     PyDoc_STR("__subclasscheck__() -> bool\ncheck if a class is a subclass")},
    {0}
};

static PyObject *
type___instancecheck__(PyObject *type, PyObject *inst)
{
    switch (_PyObject_RealIsInstance(inst, type)) {
    case -1:
        return NULL;
    case 0:
        Py_RETURN_FALSE;
    default:
        Py_RETURN_TRUE;
    }
}

int
_PyObject_RealIsInstance(PyObject *inst, PyObject *cls)
{
    return recursive_isinstance(inst, cls);
}

简单来说就是isinstance(obj, cls)会先看ob_type,然后在看__class__



-EOF-
含瀚家的老袁,转载请注明出处