RE_EMAIL1 re compile -_ a-z0-9 -_ a-z0-9 a-z 10 re RE_EMAIL2 re compil

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
RE_EMAIL1 = re.compile(r'([-_.a-z0-9]+\s*@\s*(?:[-_.a-z0-9]+\.)+[a-z]{2,10})', re.I)
RE_EMAIL2 = re.compile(r'mailto:([^"?\'\\>& ]+)', re.I)
def parse_emails(grab):
body = decode_entities(grab.response.unicode_body())
emails = set()
for email in RE_EMAIL1.findall(body):
emails.add(email.replace(' ', ''))
for email in RE_EMAIL2.findall(body):
email = urllib.unquote(email.replace(' ', ''))
if ';' in email:
emails2 = [x.strip() for x in email.split(';')]
else:
emails2 = [email]
for email2 in emails2:
emails.add(email2)
return list(emails)