Python
10 Dec 2010
 
 
 
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def parse_published_field(entry, post):
time_fields = ['published', 'created', 'updated', 'modified']
for key in time_fields:
value = getattr(entry, '%s_parsed' % key, None)
if value:
post.published = datetime.datetime.fromtimestamp(time.mktime(value))
break
if not post.published:
for key in time_fields:
value = getattr(entry, key, None)
if value:
if not feed.parsed_with_success:
post.published = datetime.datetime(1970, 1, 1)
else:
post.published = datetime.datetime.now()
post.published_unparsed = value
break
if not post.published:
raise ProcessingError('Could not parse published date of entry %s' % entry.link)
def parse_content_fields(entry, post):
body = ''
if hasattr(entry, 'content'):
mapping = dict((x.type, x.value) for x in entry.content)
if 'text/html' in mapping:
body = mapping['text/html']
elif 'application/xhtml+xml' in mapping:
body = mapping['application/xhtml+xml']
else:
body = mapping.values()[0]
if hasattr(entry, 'summary') and len(entry.summary) > body:
body = entry.summary
if hasattr(entry, 'description') and len(entry.description) > body:
body = entry.summary
lxml_body = fromstring(body)
lxml_quote = fromstring(body[:QUOTE_SIZE])
post.body_html = tostring(lxml_body, encoding='utf-8')
post.quote_html = tostring(lxml_quote, encoding='utf-8')
post.body_text = unicode(lxml_body.text_content())
post.quote_text = unicode(lxml_quote.text_content())