#!/usr/bin/python # -*- coding: utf-8 -*- import urllib2 from datetime import date import re def get_all_issues(): today = date.today() rawstr = r"""
""" for year in range(2008, today.year+1): for month in range(1, 12+1): target_url = "http://www.boston.com/bigpicture/%s/%s/" % (year, str(month).zfill(2)) try: f = urllib2.urlopen(target_url) for l in f.readlines(): match_obj = re.search(rawstr, l, re.IGNORECASE|re.DOTALL) if match_obj: link = match_obj.group('link') print link except urllib2.HTTPError, e: pass if "__main__" == __name__: get_all_issues() print "done"