Python thread local

　　由于GIL的原因，笔者在日常开发中几乎没有用到python的多线程。如果需要并发，一般使用多进程，对于IO Bound这种情况，使用协程也是不错的注意。但是在python很多的网络库中，都支持多线程，基本上都会使用到threading.local。在python中threading.local用来表示线程相关的数据，线程相关指的是这个属性再各个线程中是独立的互不影响，先来看一个最简答的例子：

 class Widgt(object):

     pass

 import threading

 def test():

     local_data = threading.local()

     # local_data = Widgt()

     local_data.x = 1

     def thread_func():

         print('Has x in new thread: %s' % hasattr(local_data, 'x'))

         local_data.x = 2

     t = threading.Thread(target = thread_func)

     t.start()

     t.join()

     print('x in pre thread is %s' % local_data.x)

 if __name__ == '__main__':

     test()

输出：

Has x in new thread: False

x in pre thread is 1

　　可以看到，在新的线程中 local_data 并没有x属性，并且在新线程中的赋值并不会影响到其他线程。也可以稍微改改代码，去掉第7行的注释，local_data就变成了线程共享的变量。

　　local怎么实现的呢在threading.py 代码如下：

 try:

     from thread import _local as local

 except ImportError:

     from _threading_local import local

　　可以看到，local是python的buildin class，同时也提供了一个纯python版本的参考实现，在_threading_local.py，我们来看看代码（代码不全省略了几个函数）：

 class _localbase(object):

     __slots__ = '_local__key', '_local__args', '_local__lock'

     def __new__(cls, *args, **kw):

         self = object.__new__(cls)

         key = '_local__key', 'thread.local.' + str(id(self)) # 产生一个key，这个key在同一个进程的多个线程中是一样的

         object.__setattr__(self, '_local__key', key)

         object.__setattr__(self, '_local__args', (args, kw))

         object.__setattr__(self, '_local__lock', RLock()) # 可重入的锁

         if (args or kw) and (cls.__init__ is object.__init__):

             raise TypeError("Initialization arguments are not supported")

         # We need to create the thread dict in anticipation of

         # __init__ being called, to make sure we don't call it

         # again ourselves.

         dict = object.__getattribute__(self, '__dict__')

         current_thread().__dict__[key] = dict   # 在current_thread这个线程唯一的对象的—__dict__中加入 key

         return self

 def _patch(self):

     key = object.__getattribute__(self, '_local__key')

     d = current_thread().__dict__.get(key)    # 注意 current_thread 在每一个线程是不同的对象

     if d is None: # 在新的线程第一次调用时

         d = {}    # 一个空的dict ！！！

         current_thread().__dict__[key] = d

         object.__setattr__(self, '__dict__', d) # 将实例的__dict__赋值为 线程独立的一个字典

         # we have a new instance dict, so call out __init__ if we have

         # one

         cls = type(self)

         if cls.__init__ is not object.__init__:

             args, kw = object.__getattribute__(self, '_local__args')

             cls.__init__(self, *args, **kw)

     else:

         object.__setattr__(self, '__dict__', d)

 class local(_localbase):

     def __getattribute__(self, name):

         lock = object.__getattribute__(self, '_local__lock')

         lock.acquire()

         try:

             _patch(self) # 这条语句执行之后，self.__dict__ 被修改成了线程独立的一个dict

             return object.__getattribute__(self, name)

         finally:

             lock.release()

　　代码中已经加入了注释，便于理解。总结就是，在每个线程中增加一个独立的dict（通过current_thread()这个线程独立的对象），然后每次对local实例增删改查的时候，进行__dict__的替换。我们看看测试代码：

 import threading

 from _threading_local import local

 def test():

     local_data = local()

     local_data.x = 1

     print 'id of local_data', id(local_data)

     def thread_func():

         before_keys = threading.current_thread().__dict__.keys()

         local_data.x = 2

         after = threading.current_thread().__dict__

         # print set(after.keys())  - set(before.keys())

         print [(e, v) for (e, v) in after.iteritems() if e not in before_keys]

     t = threading.Thread(target = thread_func)

     t.start()

     t.join()

     print('x in pre thread is %s' % local_data.x)

 if __name__ == '__main__':

     test()

输出：

　　id of local_data 40801456
　　[(('_local__key', 'thread.local.40801456'), {'x': 2})]

　　从输出可以看到，在这次运行总，local_data的id是40801456，在每个线程中都是一样的。在新的线程（thread_func函数）中访问local_data对象之前，current_thread()返回的对象是没有__local_key的，在第10行访问的时候会增加这个属性（_patch函数中）。

　　在gevent中，也有一个类叫local，其作用是提供协程独立的数据。PS：gevent中提供了几乎与python原生协程一样的数据结构，如Event、Semaphore、Local，而且，gevent的代码和文档中也自称为“thread”，这点需要注意。gevent.local的实现借鉴了上面介绍的_threading_local.py, 区别在于，_threading_local.local 将线程独立的数据存放在current_thread()中，而gevent.local将协程独立的数据存放在greenlet.getcurrent()中。

　　最后，如果在代码中使用了gevent.monkey.patch_all()，那么python原生的threading.local将会被替换成gevent.local.local。之前在看bottle的代码的时候，发现里面都是使用的threading.local，当时也对monkey_patch具体patch了那些模块不了解，于是就想如果使用gevent是否会出错呢，结果测试了很久都发现没问题，直到重新细看bottle源码才发现原因所在。代码如下：

 class GeventServer(ServerAdapter):

     """ Untested. Options:

         * See gevent.wsgi.WSGIServer() documentation for more options.

     """

     def run(self, handler):

         from gevent import pywsgi, local

         if not isinstance(threading.local(), local.local): ＃注意这里

             msg = "Bottle requires gevent.monkey.patch_all() (before import)"

             raise RuntimeError(msg)

         if self.quiet:

             self.options['log'] = None

         address = (self.host, self.port)

         server = pywsgi.WSGIServer(address, handler, **self.options)

         if 'BOTTLE_CHILD' in os.environ:

             import signal

             signal.signal(signal.SIGINT, lambda s, f: server.stop())

         server.serve_forever()

　　这个小插曲其实也反映了monkey-patch的一些优势与劣势。其优势在于不对源码修改就能改变运行时行为，提高性能；同时，对于缺乏经验或者对patch细节不了解的人来说，会带来静态代码与运行结果之间的认知差异。

references：

bottle.py源码

gevent tutorial

秒客网

Python thread local

相关文章