coding utf-8 from grab spider import Spider Task inline_task from grab

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# coding: utf-8
from grab.spider import Spider, Task, inline_task
from grab.error import DataNotFound
import logging
logging.basicConfig(level=logging.DEBUG)
class RegSpider(Spider):
def task_generator(self):
grab = self.create_grab_instance()
url = 'http://www.gofuckbiz.com/memberlist.php'
grab.setup(url=url)
yield Task('index', grab=grab)
@inline_task
def task_index(self, grab, task):
xpath = '//a[@rel="next"]/@href'
while True:
try:
href = grab.doc.select(xpath).text()
except DataNotFound:
break
url = 'http://www.gofuckbiz.com/' + href
grab.setup(url=url)
grab = yield Task(grab=grab)
for el in grab.doc.select('//td[@class="alt1Active"]/a/@href'):
url = "http://www.gofuckbiz.com/" + el.text()
grab.setup(url=url)
tsk = Task('member', grab=grab)
self.add_task(tsk)
def task_member(self, grab, task):
nick = grab.doc.select('//h1').text('')
if nick:
print nick
bot = RegSpider(thread_number=25)
bot.run()