coding cp1251 import sys import thread import pycurl import StringIO i

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# -*- coding:cp1251
import sys
import thread
import pycurl
import StringIO
import re
import urllib
params = urllib.urlencode({'text': u'витамин'.encode('utf8')})
url = "http://wordstat.yandex.ru/?%s" % params
data = StringIO.StringIO()
curl = pycurl.Curl()
curl.setopt(pycurl.FOLLOWLOCATION, 0)
curl.setopt(pycurl.CONNECTTIMEOUT, 20)
curl.setopt(pycurl.URL, url)
curl.setopt(pycurl.WRITEFUNCTION, data.write)
try:
curl.perform()
except:
pass
curl.close()
rez = data.getvalue()
pattern = re.compile(r'<td>\s+<a href="\?page[^"]+">(.*?)</a>\s+</td>\s+<td align="right">([\d.]+)</td>', re.S)
group = pattern.findall(rez)
d = dict()
for a,b in group:
if int(b) > 300:
d[a] = b
file("yandex.txt", 'w').write(repr(d))