coding cp1251 import sys import thread import pycurl import StringIO i

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# -*- coding:cp1251
import sys
import thread
import pycurl
import StringIO
import re
import urllib
import pickle
import simplejson
fi = open("yande.txt", 'r')
items = simplejson.load(fi)
fi.close()
itemss = list(items.keys())
def some_function():
global items, count_thread, d
while itemss:
key = itemss.pop()
params = urllib.urlencode({'text': key.encode('utf8')})
url = "http://wordstat.yandex.ru/?%s" % params
data = StringIO.StringIO()
curl = pycurl.Curl()
curl.setopt(pycurl.FOLLOWLOCATION, 0)
curl.setopt(pycurl.CONNECTTIMEOUT, 30)
curl.setopt(pycurl.URL, url)
curl.setopt(pycurl.WRITEFUNCTION, data.write)
try:
curl.perform()
except:
pass
curl.close()
rez = data.getvalue()
pattern = re.compile(r'<td>\s+<a href="\?page[^"]+">(.*?)</a>\s+</td>\s+<td align="right">([\d.]+)</td>', re.S)
group = pattern.findall(rez)
d = dict((a, b) for a, b in group if int(b) > 300)
count_thread -= 1
count_thread = 0
for i in xrange(0,100):
count_thread += 1
thread.start_new_thread(some_function,())
while(count_thread>0):
pass
f = open("yandex-result.txt", 'a+')
f.write(simplejson.dumps(d))
f.close()