wordstat parser

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
def wordstat(request):
if request.method == 'POST':
searchform = WordstatSearchForm(request.POST)
key = request.POST['keyword']
data = [("rpt", "ppc"), ("key", ""), ("shw", "1"), ("tm", ""), ("checkboxes", ""), ("text", key), ("regions_text", "Все"), ("regions", ""),]
enc_data = urllib.urlencode(data)
f = urllib.urlopen("http://wordstat.yandex.ru/advq" + "?" + enc_data)
doc = f.read()
soup = BeautifulSoup.BeautifulSoup(''.join(doc))
result = soup.findAll('tr', { "class" : "tlist"})
res = re.compile(r'^<tr class="tlist" bgcolor="([\w]+)"><td><a href="([\S]+)">([\S\s]+)</a></td>([\s]?)<td align="right">([\d]+)</td></tr>$')
results = []
for s in result:
g = res.search(unicode(s))
tmpres = {}
tmpres['keyword'] = g.group(3)
tmpres['impressions'] = g.group(5)
results.append(tmpres)
else:
searchform = WordstatSearchForm()
results = []
return render_to_response('wordstat.html', {'searchform': searchform, 'results': results})