def getMeta(self):
global detector
title = keywords = descriptions = ''
data = self.data
try:
codepage=re.findall("""charset=([a-zA-Z0-9\-]+)""", self.CONTENT_TYPE)
if codepage:
data=data.decode(codepage[0])
else:
codepage=re.findall("""charset=([a-zA-Z0-9\-]+)""", data)
if codepage:
data=data.decode(codepage[0])
else:
detector.feed(data)
detector.reset()
data=data.decode(detector.result["encoding"])
except:
try:
detector.feed(data)
detector.reset()
data=data.decode(detector.result["encoding"])
except:
cursor.execute("""delete from getmymeta where id=%s""",self.urserData['ID'])
print type(data)
try:
html = fromstring(data)
except:
print data
sys.exit()
results=html.xpath('//title')
if results:
title = results[0].text_content().title()
results=html.xpath('//meta[@name="keywords"]')
if results:
keywords = results[0].get('content')
results=html.xpath('//meta[@name="description"]')
if results:
descriptions = results[0].get('content')
title=title.encode('utf-8')
keywords=keywords.encode('utf-8')
descriptions=descriptions.encode('utf-8')
cursor.execute("update getmymeta set title=%s, keywords=%s, description=%s, iswork='1' where id=%s", (title,keywords,descriptions,self.urserData['ID']))
return
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
Traceback (most recent call last):
File "./parser.py", line 98, in ?
mcurl.fetch()
File "/var/www/wuoru/wuo.ru/www/mcurl.py", line 151, in fetch
c.poolData['CALLBACK'](c)
File "./parser.py", line 69, in getMeta
html = fromstring(data)
File "/usr/lib/python2.4/site-packages/lxml-2.2-py2.4-linux-i686.egg/lxml/html/__init__.py", line 603, in fromstring
doc = document_fromstring(html, parser=parser, base_url=base_url, **kw)
File "/usr/lib/python2.4/site-packages/lxml-2.2-py2.4-linux-i686.egg/lxml/html/__init__.py", line 511, in document_fromstring
value = etree.fromstring(html, parser, **kw)
File "lxml.etree.pyx", line 2534, in lxml.etree.fromstring (src/lxml/lxml.etree.c:51135)
File "parser.pxi", line 1514, in lxml.etree._parseMemoryDocument (src/lxml/lxml.etree.c:76076)
ValueError: Unicode strings with encoding declaration are not supported.