import urllib import re from BeautifulSoup import BeautifulSoup RE_NUM

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
import urllib
import re
from BeautifulSoup import BeautifulSoup
RE_NUMBER = re.compile('\d+')
data = urllib.urlopen('http://news.ycombinator.com').read()
soup = BeautifulSoup(data)
for td in soup.findAll('td', 'title'):
if td.a:
title = td.a.string
tr = td.parent.nextSibling
if tr:
score = RE_NUMBER.search(tr.find('span').string).group(0)
print title, '/', score