pool None for item in db file find limit 100000 no_cursor_timeout True

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
pool = None
for item in db.file.find({}, limit=100000, no_cursor_timeout=True):
count += 1
stat.tick()
if pool is None or not count % 10000:
pool = ProcessPoolExecutor(max_workers=1)
print('Pool reseted')
try:
fut = pool.submit(process_file, item['path'], item['_id'])
except Exception as ex:
error_logger('URL: %s, error: %s' % (item['url'], item['_id']))
else:
comp = fut.result()
extra_fields = extra_fields.union([x[0] for x in comp['extra_fields']])