Project

General

Profile

1
# Parallel processing
2

    
3
import cPickle
4
import itertools
5
import Queue
6
import rand
7
import types
8
import warnings
9

    
10
import collection
11
import dicts
12
import exc
13
from Runnable import Runnable
14

    
15
def try_pickle(value):
16
    try: cPickle.dumps(value)
17
    except Exception, e:
18
        exc.add_msg(e, 'Tried to pickle: '+repr(value))
19
        raise
20

    
21
def vars_id_dict(locals_, globals_, *misc):
22
    '''Usage: vars_id_dict(locals(), globals(), misc...)'''
23
    vars_ = map(lambda v: v.values(), [locals_, globals_]) + list(misc)
24
    return dicts.id_dict(vars_)
25

    
26
def prepickle(value, vars_id_dict_):
27
    def filter_(value):
28
        # Try pickling the value. If it fails, we'll get a full traceback here,
29
        # which is not provided with pickling errors in multiprocessing's Pool.
30
        try: try_pickle(value)
31
        except Exception, e:
32
            id_ = id(value)
33
            if id_ in vars_id_dict_: value = id_
34
            else: raise e
35
        return value
36
    return collection.rmap(filter_, value)
37

    
38
def post_unpickle(value, vars_id_dict_):
39
    def filter_(value):
40
        try: return vars_id_dict_[value] # value is an id()
41
        except KeyError: return value
42
    return collection.rmap(filter_, value)
43

    
44
class SyncPool:
45
    '''A dummy synchronous Pool to use if multiprocessing is not available'''
46
    def __init__(self, processes=None): pass
47
    
48
    class Result:
49
        def __init__(self, value): self.value = value
50
        
51
        def get(timeout=None): return self.value
52
        
53
        def wait(timeout=None): pass
54
        
55
        def ready(): return True
56
        
57
        def successful(): return True # TODO: False if raised exception
58
    
59
    def apply_async(self, func, args=(), kw_args={}, callback=None):
60
        if callback == None: callback = lambda v: None
61
        
62
        value = func(*args, **kw_args)
63
        callback(value)
64
        return self.Result(value)
65

    
66
class MultiProducerPool:
67
    '''A multi-producer pool. You must call pool.main_loop() in the thread that
68
    created this to process new tasks.'''
69
    
70
    def __init__(self, processes=None, locals_=None, globals_=None, *shared):
71
        '''
72
        @param processes If 0, uses SyncPool
73
        @post The # processes actually used is made available in self.process_ct
74
        '''
75
        if locals_ == None: locals_ = locals()
76
        if globals_ == None: globals_ = globals()
77
        
78
        try:
79
            if processes == 0: raise ImportError('turned off')
80
            import multiprocessing
81
            import multiprocessing.pool
82
        except ImportError, e:
83
            warnings.warn(UserWarning('Not using parallel processing: '+str(e)))
84
            processes = 1
85
            Pool_ = SyncPool
86
            Queue_ = Queue.Queue
87
        else:
88
            if processes == None: processes = multiprocessing.cpu_count()
89
            Pool_ = multiprocessing.pool.Pool
90
            Queue_ = multiprocessing.Queue
91
        
92
        self.process_ct = processes
93
        self.pool = Pool_(processes)
94
        self.queue = Queue_()
95
        # Values that may be pickled by id()
96
        self.vars_id_dict = vars_id_dict(locals_, globals_, *shared)
97
    
98
    def share(self, value):
99
        '''Call this on every value that that may be pickled by id()'''
100
        self.vars_id_dict[id(value)] = value
101
    
102
    def main_loop(self):
103
        '''@param pool Must be a pool returned by mk_pool()'''
104
        try:
105
            while True:
106
                # block=False raises Empty immediately if the queue is empty,
107
                # which indicates that the program is done
108
                call = self.queue.get(block=False)
109
                self.pool.apply_async(call.func, self.post_unpickle(call.args),
110
                    self.post_unpickle(call.kw_args), call.callback)
111
        except Queue.Empty: pass
112
    
113
    class Result:
114
        def get(timeout=None): raise NotImplementedError()
115
        
116
        def wait(timeout=None): raise NotImplementedError()
117
        
118
        def ready(): raise NotImplementedError()
119
        
120
        def successful(): raise NotImplementedError()
121
    
122
    def apply_async(self, func, args=(), kw_args={}, callback=None):
123
        assert callback == None, 'Callbacks not supported'
124
        
125
        call = Runnable(func, *self.prepickle(args), **self.prepickle(kw_args))
126
        call.callback = callback # store this inside the Runnable
127
        
128
        self.queue.put_nowait(call)
129
        return self.Result()
130
    
131
    def prepickle(self, value): return prepickle(value, self.vars_id_dict)
132
    
133
    def post_unpickle(self, value):
134
        return post_unpickle(value, self.vars_id_dict)
(17-17/31)