from grab spider import Task Spider from project database import db cl

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
from grab.spider import Task, Spider
from project.database import db
class VkSpider(Spider):
def task_generator(self):
yield Task('page', url='https://vk.com/catalog.php')
def task_page(self, grab, task):
for elem in grab.doc('//div[@id="content"]/div/a'):
url = elem.attr('href')
if '?selection' in url:
yield Task('page', url=grab.make_url_absolute(url))
self.stat.inc('int-page')
else:
user = {
'_id': url,
'name': elem.text().split(' (', 1)[1]\
.strip('()').strip(),
}
db.user.save(user)
self.stat.inc()