import re import urllib from yos boss import ysearch from yos yql impo

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import re
import urllib
from yos.boss import ysearch
from yos.yql import db
RE_CONFERENCE = re.compile('conference\.(?:[-a-z0-9]+\.)+[a-z]+')
#START_URL = 'http://www.jabberes.org/servers/beta/servers_by_times_online.html'
def load_list(fname):
return file(fname).read().splitlines()
def save_list(fname, items):
file(fname, 'w').write('\n'.join(items))
def find_servers(url):
try:
data = urllib.urlopen(url).read()
except Exception, ex:
print ex
return []
servers = RE_CONFERENCE.findall(data)
return servers
def find_lists(server):
try:
data = ysearch.search('"%s"' % server, count=50)
table = db.create(data=data)
items = []
for row in table.rows:
items.append(row['url'])
return items
except Exception, ex:
print ex
return []
def save_lists():
save_list('server_list.txt', servers)
save_list('url_list.txt', urls)
save_list('processed_url_list.txt', processed_urls)
servers = load_list('server_list.txt')
urls = load_list('url_list.txt')
processed_urls = load_list('processed_url_list.txt')
while True:
url = urls.pop(0)
print 'processing list: %s' % url
items = find_servers(url)
print 'found %d servers in %s list' % (len(items), url)
for item in items:
if not item in servers:
print 'Found new server: %s' % item
servers.append(item)
lists = find_lists(item)
print 'Find %d lists on "%s" query' % (len(lists), item)
for item2 in lists:
if not item2 in urls and not item2 in processed_urls:
print 'Found new list url: %s' % item2
urls.append(item2)
save_lists()
processed_urls.append(url)