#!/usr/bin/python
#coding=utf-8
# format
# pyfeedfilter --title=жопа --author=Василий
# target: skipped
# на входе stdin, ессестно ф формате фида, rss/atom
# на stdout тот-же фид, но только с выжившими элементами
import feedparser
import sys
import getopt
import feed_marshal
from xml.dom import minidom
#########################################################
def filter_title(entry, title, invert):
title = title.decode('utf-8')
return (entry.title.count(title) != 0) ^ invert
#########################################################
def filter_text(entry, text, invert):
text = text.decode('utf-8')
return (entry.description.count(text) != 0) ^ invert
#########################################################
def filter_author(entry, author, invert):
author = author.decode('utf-8')
return (entry.author.count(author) != 0) ^ invert
#########################################################
# bug: need more enclosures
def filter_mime_type(entry, mime_type, invert):
mime_type = mime_type.decode('utf-8')
return (entry.enclosures[0].type.count(mime_type) != 0) ^ invert
#########################################################
# bug: need more enclosures
def filter_mime_size_min(entry, mime_size_min, invert):
return (int(entry.enclosures[0].length) >= int(mime_size_min)) ^ invert
#########################################################
# bug: need more enclosures
def filter_mime_size_max(entry, mime_size_max, invert):
return (int(entry.enclosures[0].length) <= int(mime_size_max)) ^ invert
#########################################################
def process(feed_file, opts, filter_func, invertConditions):
string = feed_file.read()
xml = minidom.parseString(string)
parser = feedparser.parse(string)
entries = parser.entries
items = xml.getElementsByTagName('item')
channel = xml.getElementsByTagName('channel')[0]
m = entries[:]
# filter
for opt, arg in opts:
if filter_func.has_key(opt):
entries = filter(lambda e: filter_func[opt](e, arg, invertConditions[opt] ), entries)
# removes filtered nodes
i = 0
while i < len(m):
if entries.count(m[i]) == 0:
del m[i]
item = xml.getElementsByTagName('item')[i]
channel.removeChild(item)
else:
i += 1
return xml
#########################################################
def main():
usage = '''
Usage: pyfeedfilter [OPTIONS]
-h, --help this help
--title filter feed title
--text filter feed description text
--author filter feed author
--mime-type filter feed enclosure mime-type
--mime-size-min filter feed enclosure mime-type min mime size
--mime-size-max filter feed enclosure mime-type max mime size
pyfeedfilter take feed xml from stdin
for inverse match use ^ after =
'''
try:
options, args = getopt.getopt(sys.argv[1:],
'h',
['help', 'title=', 'text=', 'author=',
'mime-type=', 'mime-size-min=', 'mime-size-max='])
except getopt.GetoptError:
print 'getopt.GetoptError: ', getopt.GetoptError.msg
except:
print "Unknown Error"
for opt, arg in options:
if opt in ("-h", "--help"):
print usage
exit()
filter_func = {}
filter_func['--title'] = filter_title
filter_func['--text'] = filter_text
filter_func['--author'] = filter_author
filter_func['--mime-type'] = filter_mime_type
filter_func['--mime-size-min'] = filter_mime_size_min
filter_func['--mime-size-max'] = filter_mime_size_max
#filter_func[] =
# invert matches
invertCond = {}
for opt, arg in options:
invertCond[opt] = arg[0] == "^"
if arg[0] == "^":
index = options.index( (opt, arg) )
arg = arg[1:]
options[ index ] = (opt, arg)
# processig
xmlDoc = process(sys.stdin, options, filter_func, invertCond)
sys.stdout.write( xmlDoc.toxml("UTF-8") )
#########################################################
#########################################################
if __name__ == '__main__':
main()