1 |
1858
|
aaronmk
|
# Parallel processing
|
2 |
|
|
|
3 |
1873
|
aaronmk
|
import cPickle
|
4 |
1877
|
aaronmk
|
import itertools
|
5 |
1862
|
aaronmk
|
import Queue
|
6 |
1877
|
aaronmk
|
import rand
|
7 |
1873
|
aaronmk
|
import types
|
8 |
1858
|
aaronmk
|
import warnings
|
9 |
|
|
|
10 |
1877
|
aaronmk
|
import collection
|
11 |
|
|
import dicts
|
12 |
1873
|
aaronmk
|
import exc
|
13 |
|
|
from Runnable import Runnable
|
14 |
|
|
|
15 |
|
|
def try_pickle(value):
|
16 |
|
|
try: cPickle.dumps(value)
|
17 |
|
|
except Exception, e:
|
18 |
|
|
exc.add_msg(e, 'Tried to pickle: '+repr(value))
|
19 |
|
|
raise
|
20 |
|
|
|
21 |
1877
|
aaronmk
|
def prepickle(value, vars_id_dict_):
|
22 |
1885
|
aaronmk
|
def filter_(value, is_leaf):
|
23 |
1880
|
aaronmk
|
id_ = id(value)
|
24 |
|
|
if id_ in vars_id_dict_: value = id_
|
25 |
1877
|
aaronmk
|
# Try pickling the value. If it fails, we'll get a full traceback here,
|
26 |
|
|
# which is not provided with pickling errors in multiprocessing's Pool.
|
27 |
1885
|
aaronmk
|
elif is_leaf: try_pickle(value)
|
28 |
1877
|
aaronmk
|
return value
|
29 |
|
|
return collection.rmap(filter_, value)
|
30 |
|
|
|
31 |
|
|
def post_unpickle(value, vars_id_dict_):
|
32 |
1885
|
aaronmk
|
def filter_(value, is_leaf):
|
33 |
|
|
if type(value) == int: value = vars_id_dict_.get(value, value)
|
34 |
|
|
# get() returns the value itself if it isn't a known id()
|
35 |
|
|
return value
|
36 |
1877
|
aaronmk
|
return collection.rmap(filter_, value)
|
37 |
|
|
|
38 |
1862
|
aaronmk
|
class SyncPool:
|
39 |
1861
|
aaronmk
|
'''A dummy synchronous Pool to use if multiprocessing is not available'''
|
40 |
1862
|
aaronmk
|
def __init__(self, processes=None): pass
|
41 |
|
|
|
42 |
1860
|
aaronmk
|
class Result:
|
43 |
|
|
def __init__(self, value): self.value = value
|
44 |
|
|
|
45 |
|
|
def get(timeout=None): return self.value
|
46 |
|
|
|
47 |
|
|
def wait(timeout=None): pass
|
48 |
|
|
|
49 |
|
|
def ready(): return True
|
50 |
|
|
|
51 |
|
|
def successful(): return True # TODO: False if raised exception
|
52 |
|
|
|
53 |
1877
|
aaronmk
|
def apply_async(self, func, args=(), kw_args={}, callback=None):
|
54 |
1858
|
aaronmk
|
if callback == None: callback = lambda v: None
|
55 |
|
|
|
56 |
1860
|
aaronmk
|
value = func(*args, **kw_args)
|
57 |
|
|
callback(value)
|
58 |
1863
|
aaronmk
|
return self.Result(value)
|
59 |
1858
|
aaronmk
|
|
60 |
1862
|
aaronmk
|
class MultiProducerPool:
|
61 |
|
|
'''A multi-producer pool. You must call pool.main_loop() in the thread that
|
62 |
|
|
created this to process new tasks.'''
|
63 |
1873
|
aaronmk
|
|
64 |
1885
|
aaronmk
|
def __init__(self, processes=None, locals_={}, *shared):
|
65 |
1862
|
aaronmk
|
'''
|
66 |
|
|
@param processes If 0, uses SyncPool
|
67 |
|
|
@post The # processes actually used is made available in self.process_ct
|
68 |
|
|
'''
|
69 |
|
|
try:
|
70 |
|
|
if processes == 0: raise ImportError('turned off')
|
71 |
|
|
import multiprocessing
|
72 |
|
|
import multiprocessing.pool
|
73 |
|
|
except ImportError, e:
|
74 |
|
|
warnings.warn(UserWarning('Not using parallel processing: '+str(e)))
|
75 |
|
|
processes = 1
|
76 |
|
|
Pool_ = SyncPool
|
77 |
|
|
Queue_ = Queue.Queue
|
78 |
|
|
else:
|
79 |
|
|
if processes == None: processes = multiprocessing.cpu_count()
|
80 |
|
|
Pool_ = multiprocessing.pool.Pool
|
81 |
|
|
Queue_ = multiprocessing.Queue
|
82 |
|
|
|
83 |
|
|
self.process_ct = processes
|
84 |
|
|
self.pool = Pool_(processes)
|
85 |
|
|
self.queue = Queue_()
|
86 |
1885
|
aaronmk
|
self.active_tasks = 0
|
87 |
|
|
|
88 |
1877
|
aaronmk
|
# Values that may be pickled by id()
|
89 |
1885
|
aaronmk
|
self.vars_id_dict = dicts.IdDict()
|
90 |
|
|
self.share(self, *shared).share_vars(locals_).share_vars(globals())
|
91 |
1862
|
aaronmk
|
|
92 |
1885
|
aaronmk
|
def share(self, *values):
|
93 |
|
|
'''Call this on all values that that should be pickled by id()'''
|
94 |
|
|
self.vars_id_dict.add(*values)
|
95 |
|
|
return self
|
96 |
1877
|
aaronmk
|
|
97 |
1885
|
aaronmk
|
def share_vars(self, vars_):
|
98 |
|
|
'''Call this on all vars that that should be pickled by id().
|
99 |
|
|
Usage: self.share_vars(locals())
|
100 |
|
|
'''
|
101 |
|
|
self.vars_id_dict.add_vars(vars_)
|
102 |
|
|
return self
|
103 |
|
|
|
104 |
1862
|
aaronmk
|
def main_loop(self):
|
105 |
1885
|
aaronmk
|
'''Prime the task queue with at least one task before calling this'''
|
106 |
|
|
while True:
|
107 |
|
|
try: call = self.queue.get(timeout=0.1) # sec
|
108 |
|
|
except Queue.Empty:
|
109 |
|
|
if self.active_tasks == 0: break # program done
|
110 |
|
|
else: continue
|
111 |
|
|
|
112 |
|
|
def handle_result(*args, **kw_args):
|
113 |
|
|
self.active_tasks -= 1
|
114 |
|
|
if call.callback != None: call.callback(*args, **kw_args)
|
115 |
|
|
|
116 |
|
|
self.active_tasks += 1
|
117 |
|
|
self.pool.apply_async(call.func, self.post_unpickle(call.args),
|
118 |
|
|
self.post_unpickle(call.kw_args), handle_result)
|
119 |
1862
|
aaronmk
|
|
120 |
|
|
class Result:
|
121 |
|
|
def get(timeout=None): raise NotImplementedError()
|
122 |
|
|
|
123 |
|
|
def wait(timeout=None): raise NotImplementedError()
|
124 |
|
|
|
125 |
|
|
def ready(): raise NotImplementedError()
|
126 |
|
|
|
127 |
|
|
def successful(): raise NotImplementedError()
|
128 |
|
|
|
129 |
1877
|
aaronmk
|
def apply_async(self, func, args=(), kw_args={}, callback=None):
|
130 |
|
|
assert callback == None, 'Callbacks not supported'
|
131 |
1873
|
aaronmk
|
|
132 |
1877
|
aaronmk
|
call = Runnable(func, *self.prepickle(args), **self.prepickle(kw_args))
|
133 |
1873
|
aaronmk
|
call.callback = callback # store this inside the Runnable
|
134 |
|
|
|
135 |
1862
|
aaronmk
|
self.queue.put_nowait(call)
|
136 |
1863
|
aaronmk
|
return self.Result()
|
137 |
1877
|
aaronmk
|
|
138 |
|
|
def prepickle(self, value): return prepickle(value, self.vars_id_dict)
|
139 |
|
|
|
140 |
|
|
def post_unpickle(self, value):
|
141 |
|
|
return post_unpickle(value, self.vars_id_dict)
|