pool None count query path regex web ledin html 012 futs def task_iter

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
pool = None
count = 0
query = {}#'path': {'$regex': '^/web/ledin/html/0[012]/'}}
futs = []
def task_iterator():
for item in remote_db.file.find(query, limit=0, no_cursor_timeout=True):
yield item
task_iter = task_iterator()
count = 0
while True:
if pool is None or not count % POOL_RESET_PERIOD:
pool = ProcessPoolExecutor()
print('Pool reseted')
futs = []
for x in range(POOL_TASK_CHUNK):
count += 1
try:
item = next(task_iter)
except StopIteration:
break
else:
fut = pool.submit(process_file, item['path'], item['_id'])
futs.append(fut)
print('Submited %d tasks into pool' % len(futs))
for fut in futs:
try:
comp = fut.result()
except Exception as ex:
error_logger.error('URL: %s', exc_info=ex)
stat.inc('error')
else:
stat.inc()
stat.inc('gziped-%s' % ('yes' if comp['gziped'] else 'no'))
if not futs:
print('No more tasks in task iterator')
break