import urllib import re from BeautifulSoup import BeautifulSoup RE_NUMBER = re.compile('\d+') data = urllib.urlopen('http://news.ycombinator.com').read() soup = BeautifulSoup(data) for td in soup.findAll('td', 'title'): if td.a: title = td.a.string tr = td.parent.nextSibling if tr: score = RE_NUMBER.search(tr.find('span').string).group(0) print title, '/', score