import sys import urllib import re from htmlentitydefs import name2cod

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import sys
import urllib
import re
from htmlentitydefs import name2codepoint
def htmldecode(text):
"""Decode HTML entities in the given text."""
if type(text) is unicode:
uchr = unichr
else:
uchr = lambda value: value > 255 and unichr(value) or chr(value)
def entitydecode(match, uchr=uchr):
entity = match.group(1)
if entity.startswith('#x'):
return uchr(int(entity[2:], 16))
elif entity.startswith('#'):
return uchr(int(entity[1:]))
elif entity in name2codepoint:
return uchr(name2codepoint[entity])
else:
return match.group(0)
charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
return charrefpat.sub(entitydecode, text)
def setUserAgent(userAgent):
urllib.FancyURLopener.version = userAgent
pass
def translate(text, fromLang="en", toLang="uk"):
# urllib.FancyURLopener.version = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008070400 SUSE/3.0.1-0.1 Firefox/3.0.1"
setUserAgent("Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008070400 SUSE/3.0.1-0.1 Firefox/3.0.1")
try:
post_params = urllib.urlencode({"langpair":"%s|%s" %(fromLang.lower(),toLang.lower()), "text":text,"ie":"UTF8", "oe":"UTF8"})
except KeyError, error:
print "Currently we do not support %s" %(error.args[0])
return
try:
page = urllib.urlopen("http://translate.google.com/translate_t", post_params)
content = page.read()
page.close()
match = re.search("<div id=result_box dir=\"ltr\">(.*?)</div>", content)
value = match.groups()[0]
return htmldecode(value)
except:
return 'None'