import settings import logging from datetime import datetime timedelta

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import settings
import logging
from datetime import datetime, timedelta
import os
import urllib
from urlparse import urljoin
from grab import Grab
import settings
from seo.captcha.util import solve_captcha
def setup_grab():
load_proxy()
g = Grab()
g.setup(log_dir='var',
proxy_file=settings.PROXY_CACHE_FILE,
proxy_type=settings.PROXY_TYPE)
if getattr(settings, 'PROXY_USERPWD', None):
g.setup(proxy_userpwd=settings.PROXY_USERPWD)
return g
def load_proxy():
if not settings.PROXY_URL:
raise Exception('PROXY_URL is not configured')
needs_update = False
if not os.path.exists(settings.PROXY_CACHE_FILE):
needs_update = True
else:
mtime = datetime.fromtimestamp(os.stat(settings.PROXY_CACHE_FILE).st_mtime)
check_time = datetime.now() - timedelta(seconds=settings.PROXY_CACHE_TIMEOUT)
if mtime < check_time:
needs_update = True
if needs_update:
logging.debug('Refreshing proxy cache from %s' % settings.PROXY_URL)
list_ = []
for line in urllib.urlopen(settings.PROXY_URL):
line = line.strip()
if ':' in line:
list_.append(line)
file(settings.PROXY_CACHE_FILE, 'w').write('\n'.join(list_))
logging.debug('Found %d proxy servers in remote list' % len(list_))
def crack_captcha(g):
img = g.soup.find('img', 'captcha')
url = urljoin('http://digg.com/', img['src'])
g.setup(url=url)
g.request()
data = g.original_response_body
if settings.CAPTCHA_SERVICE == 'anticaptcha':
return solve_captcha(settings.CAPTCHA_SERVICE,
key=settings.ANTICAPTCHA_KEY,
timeout=settings.CAPTCHA_TIMEOUT,
number=2,
rawimage=data)
elif settings.CAPTCHA_SERVICE == 'decaptcha':
return solve_captcha(settings.CAPTCHA_SERVICE,
key=settings.DECAPTCHA_KEY,
timeout=settings.CAPTCHA_TIMEOUT,
number=2,
password=settings.DECAPTCHA_PASSWORD,
rawimage=data)
else:
raise Exception('Unknown captcha service: %s' % settings.CAPTCHA_SERVICE)