# ******************************* # Decorator to wrap task_* method # ******************************* def integrity(integrity_func): """ Args: :param integrity_func: couldb callable or string contains name of method to call """ def build_decorator(func): @wraps(func) def func_wrapper(self, grab, task): try: if isinstance(integrity_func, str): getattr(self, integrity_func)(grab) else: integrity_func(grab) except (PageNotFound, UnexpectedHttpCode, UnexpectedContent) as ex: yield task.clone() self.stat.inc(ex.__class__.__name__) logging.error('', exc_info=ex) else: result = func(self, grab, task) if result is not None: for event in result: yield event return func_wrapper return build_decorator # **************** # Example of usage # **************** class SomeSpider(Spider): def basic_integrity(self, grab): if grab.doc.code == 404: raise PageNotFound('HTTP code 404') elif grab.doc.code != 200: import pdb; pdb.set_trace() raise UnexpectedHttpCode('HTTP code: %d' % grab.doc.code) elif not grab.doc.text_search('