bin env python coding utf-8 import urllib re ClientCookie socket ZODIA

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#!/bin/env python
# -*- coding: utf-8 -*-
import urllib, re, ClientCookie#, socket
ZODIAC = u"Близнецы"
DAYS = 4
USER_AGENT = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008070206 Firefox/3.0.1"
#TIMEOUT = 5
def fetch_page(zodiac):
opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(ClientCookie.CookieJar()))
opener.addheaders = [("User-Agent", USER_AGENT)]
ClientCookie.install_opener(opener)
page = unicode(ClientCookie.urlopen('http://www.icqbot.org/web').read(), 'cp1251')
session_id = re.search(r'.*<input type="hidden" name="ibwiSession" value="(.*?)".*', page, re.S).group(1)
frame_uri = re.search(r'.*<iframe src="(.*?)".*', page, re.S).group(1)
page = unicode(ClientCookie.urlopen('http://www.icqbot.org%s' % frame_uri, urllib.urlencode({'ibwiInput' : 'horo %s' % zodiac, 'ibwiSession' : session_id})).read(), 'cp1251')
return page
def parse_page(page):
header, date1, text1, date2, text2, date3, text3, date4, text4 = re.search(r'.* (.*?) ~<br />\r\n' + r'<br />\r\n(.*?)<br />\r\n(.*?)<br />\r\n' * (DAYS-1) + r'<br />\r\n(.*?)<br />\r\n(.*?)<br />.*', page, re.S).groups()
return '%s:\n%s\n%s\n\n%s\n%s\n\n%s\n%s\n\n%s\n%s' % (header, date1, text1, date2, text2, date3, text3, date4, text4)
if __name__ == '__main__':
#socket.setdefaulttimeout(TIMEOUT)
print parse_page(fetch_page(ZODIAC.encode('cp1251')))