class Xml(Auth):
_primary = True
def setTimeout(self, timeout):
self.settings['state']['xml_timeout'] = time.time() + timeout
def getXmlSettings(self):
if self.settings['state']['xml_timeout'] > time.time():
self._primary = False
return self.settings['config']['yandex_xml']['reserved']
else:
self._primary = True
return self.settings['config']['yandex_xml']['primary']
@tornado.web.asynchronous
def get(self):
def complete(response):
self.incStats('yandex', 'xml')
data = {}
doc = ET.fromstring(response.body)
data['count'] = -1
data['serp'] = []
try:
self.get_argument('debug')
data['xml'] = response.body
except MissingArgumentError:
pass
try:
response = doc.find('response')
error = response.find('error')
if error is not None :
if self._primary and int(error.attrib['code']) in [32, 42]:
self.setTimeout(1800)
return self.get()
else:
if int(error.attrib['code']) == 15:
data['count'] = 0
self.write(json.dumps(data, indent=4, ensure_ascii=False, encoding='utf8'))
self.finish()
return
except:
pass
try:
response = doc.find('response')
results = response.find('results')
groups = results.find('grouping')
data['count'] = groups.find('found').text
position = 0
for group in groups.findall('group'):
position += 1
doc = group.find('doc')
url = doc.find('url').text
host = doc.find('domain').text
snippets = []
passages = doc.find('passages')
if passages is not None:
for passage in passages:
snippets.append("".join(passage.itertext()))
snippet = " ... ".join(snippets)
data['serp'].append({
'position': position,
'title': "".join(doc.find('title').itertext()),
'host': host,
'url': url,
'snippet': snippet
})
except:
pass
finally:
self.write(json.dumps(data, indent=4, ensure_ascii=False, encoding='utf8'))
self.finish()
def sync_task():
xmlTemplate = """<?xml version="1.0" encoding="utf-8"?>
<request>
<query>%(query)s</query>
<groupings>
<groupby groups-on-page="100" docs-in-group="1" />
</groupings>
</request>
""";
xml = xmlTemplate % {'query': escape(self.get_argument('query'))}
try:
region = int(self.get_argument('region'))
except MissingArgumentError:
region = 0
http_client = AsyncHTTPClient()
params = self.getXmlSettings()
url = 'http://%(host)s/xmlsearch?user=%(user)s&key=%(key)s' % params
url += '&lr=%d' % region
request = HTTPRequest(
url,
method = 'POST',
body = xml
)
http_client.fetch(request, complete)
gevent.spawn(sync_task)