# -*- coding:cp1251
import sys
import thread
import pycurl
import StringIO
import re
import urllib
params = urllib.urlencode({'text': u'витамин'.encode('utf8')})
url = "http://wordstat.yandex.ru/?%s" % params
data = StringIO.StringIO()
curl = pycurl.Curl()
curl.setopt(pycurl.FOLLOWLOCATION, 0)
curl.setopt(pycurl.CONNECTTIMEOUT, 20)
curl.setopt(pycurl.URL, url)
curl.setopt(pycurl.WRITEFUNCTION, data.write)
try:
curl.perform()
except:
pass
curl.close()
rez = data.getvalue()
pattern = re.compile(r'<td>\s+<a href="\?page[^"]+">(.*?)</a>\s+</td>\s+<td align="right">([\d.]+)</td>', re.S)
group = pattern.findall(rez)
d = dict()
for a,b in group:
if int(b) > 300:
d[a] = b
file("yandex.txt", 'w').write(repr(d))