# -*- coding:cp1251 import sys import thread import pycurl import StringIO import re import urllib import pickle import simplejson fi = open("yande.txt", 'r') items = simplejson.load(fi) fi.close() itemss = list(items.keys()) def some_function(): global items, count_thread, d while itemss: key = itemss.pop() params = urllib.urlencode({'text': key.encode('utf8')}) url = "http://wordstat.yandex.ru/?%s" % params data = StringIO.StringIO() curl = pycurl.Curl() curl.setopt(pycurl.FOLLOWLOCATION, 0) curl.setopt(pycurl.CONNECTTIMEOUT, 30) curl.setopt(pycurl.URL, url) curl.setopt(pycurl.WRITEFUNCTION, data.write) try: curl.perform() except: pass curl.close() rez = data.getvalue() pattern = re.compile(r'\s+(.*?)\s+\s+([\d.]+)', re.S) group = pattern.findall(rez) d = dict((a, b) for a, b in group if int(b) > 300) count_thread -= 1 count_thread = 0 for i in xrange(0,100): count_thread += 1 thread.start_new_thread(some_function,()) while(count_thread>0): pass f = open("yandex-result.txt", 'a+') f.write(simplejson.dumps(d)) f.close()