def task_parse_items self grab task print grab response code items gra

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def task_parse_items(self, grab, task):
print grab.response.code
items = grab.doc.select(task.shop.xpath_items)
print task.shop.description.encode('utf8') + ' / ' + task.category.name.encode(
'utf8') + ' / ' + task.url.encode('utf8')
for item in items:
try:
last_price_re = re.match(task.shop.re_price,
item.select(task.shop.xpath_item_price_relative).text().encode('utf8').strip())
if last_price_re:
last_price = last_price_re.group()
else:
last_price = None
except DataNotFound:
last_price = None
data = {
'name': item.select(task.shop.xpath_item_name_relative).text().encode('utf8').strip(),
'description': item.select(task.shop.xpath_item_description_relative).text().encode('utf8').strip(),
'link': item.select(task.shop.xpath_item_link_relative).text().encode('utf8').strip(),
'shop_id': task.shop.id,
'category_id': task.category.id,
'last_price': last_price,
'checked': CHECKED,
'available': AVAILABLE,
}
self.save(data, InnerItem, task.session)
try:
next_url = grab.doc.select(task.shop.xpath_next_page).text()
except IndexError:
pass
else:
yield Task(
'parse_items',
url=grab.make_url_absolute(next_url, resolve_base=True),
category=task.category, shop=task.shop, session=task.session)