class Whoistory def __init__ self result_file_path None self grab Grab

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
class Whoistory:
def __init__(self, result_file_path=None):
self.grab = Grab()
self.result_file = open(result_file_path, 'w')
self.month_pages_list = None
self.days_pages_list = None
def month_pages(self):
self.month_pages_list = []
years = ['2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013']
months = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']
for year in years:
for month in months:
self.month_pages_list.append('http://whoistory.com/%s/%s' % (year, month))
def day_pages(self):
self.days_pages_list = []
for url in self.month_pages_list:
self.grab.go(url)
for elem in self.grab.xpath_list('//ul[@class="ul_days"]/li/a/@href'):
self.days_pages_list.append('http://whoistory.com'+elem)
def parse_domains(self):
self.month_pages()
self.day_pages()
for url in self.days_pages_list:
self.grab.go(url)
for elem in self.grab.xpath_list('//div[@class="left"]/a/text()'):
print 'Find %s' % elem
self.result_file.write('http://%s\n' % elem)
if __name__ == '__main__':
Whoistory('urls.txt').parse_domains()