Grab Spyder for App Store iOS applications

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import logging
from grab.spider import Spider, Task
XPATHS = {
'list_genres': "//ul[starts-with(@class, 'list')]/li/a[@class='top-level-genre']",
'next_page': "//ul[@class='list paginate'][2]/li[last()]/a[@class='paginate-more']",
'list_apps': "//div[@id='selectedcontent']/div/ul/li/a"
}
class AppStoreSpider(Spider):
initial_urls = ['https://itunes.apple.com/ru/genre/ios/id36?mt=8']
def task_initial(self, grab, task):
for elem in grab.doc.select(XPATHS['list_genres']):
address = elem.attr('href') + '&letter={char}'
for letter in self.get_letters():
yield Task('genre', url=address.format(char=letter))
def task_genre(self, grab, task):
apps = grab.doc.select(XPATHS['list_apps'])
if apps.exists():
for app in apps:
print(app.attr('href'))
exit()
next = grab.doc.select(XPATHS['next_page'])
if next.exists():
yield Task('genre', url=next.attr('href'))
@staticmethod
def char_range(start, end):
for c in range(ord(start), ord(end)+1):
yield chr(c)
@staticmethod
def get_letters():
chars = list(AppStoreSpider.char_range('A', 'Z'))
chars.append('*')
return chars
if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
bot = AppStoreSpider(thread_number=10)
bot.run()