# *******************************
# Decorator to wrap task_* method
# *******************************
def integrity(integrity_func):
"""
Args:
:param integrity_func: couldb callable or string contains name of
method to call
"""
def build_decorator(func):
@wraps(func)
def func_wrapper(self, grab, task):
try:
if isinstance(integrity_func, str):
getattr(self, integrity_func)(grab)
else:
integrity_func(grab)
except (PageNotFound, UnexpectedHttpCode, UnexpectedContent) as ex:
yield task.clone()
self.stat.inc(ex.__class__.__name__)
logging.error('', exc_info=ex)
else:
result = func(self, grab, task)
if result is not None:
for event in result:
yield event
return func_wrapper
return build_decorator
# ****************
# Example of usage
# ****************
class SomeSpider(Spider):
def basic_integrity(self, grab):
if grab.doc.code == 404:
raise PageNotFound('HTTP code 404')
elif grab.doc.code != 200:
import pdb; pdb.set_trace()
raise UnexpectedHttpCode('HTTP code: %d' % grab.doc.code)
elif not grab.doc.text_search('<div class="copyright_text">'):
import pdb; pdb.set_trace()
raise UnexpectedContent('Copyright message not found')
@integrity('basic_integrity')
def task_page(self, grab, task):
# business logic