Decorator to wrap task_ method def integrity integrity_func Args param

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# *******************************
# Decorator to wrap task_* method
# *******************************
def integrity(integrity_func):
"""
Args:
:param integrity_func: couldb callable or string contains name of
method to call
"""
def build_decorator(func):
@wraps(func)
def func_wrapper(self, grab, task):
try:
if isinstance(integrity_func, str):
getattr(self, integrity_func)(grab)
else:
integrity_func(grab)
except (PageNotFound, UnexpectedHttpCode, UnexpectedContent) as ex:
yield task.clone()
self.stat.inc(ex.__class__.__name__)
logging.error('', exc_info=ex)
else:
result = func(self, grab, task)
if result is not None:
for event in result:
yield event
return func_wrapper
return build_decorator
# ****************
# Example of usage
# ****************
class SomeSpider(Spider):
def basic_integrity(self, grab):
if grab.doc.code == 404:
raise PageNotFound('HTTP code 404')
elif grab.doc.code != 200:
import pdb; pdb.set_trace()
raise UnexpectedHttpCode('HTTP code: %d' % grab.doc.code)
elif not grab.doc.text_search('<div class="copyright_text">'):
import pdb; pdb.set_trace()
raise UnexpectedContent('Copyright message not found')
@integrity('basic_integrity')
def task_page(self, grab, task):
# business logic