usr bin python coding utf-8 format pyfeedfilter --title жопа --author

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/python
#coding=utf-8
# format
# pyfeedfilter --title=жопа --author=Василий
# target: skipped
# на входе stdin, ессестно ф формате фида, rss/atom
# на stdout тот-же фид, но только с выжившими элементами
import feedparser
import sys
import getopt
from xml.dom import minidom
#########################################################
def filter_title(entry, title, invert):
title = title.decode('utf-8')
return (entry.title.count(title) != 0) ^ invert
#########################################################
def filter_text(entry, text, invert):
text = text.decode('utf-8')
return (entry.description.count(text) != 0) ^ invert
#########################################################
def filter_author(entry, author, invert):
author = author.decode('utf-8')
return (entry.author.count(author) != 0) ^ invert
#########################################################
# bug: need more enclosures
def filter_mime_type(entry, mime_type, invert):
mime_type = mime_type.decode('utf-8')
return (entry.enclosures[0].type.count(mime_type) != 0) ^ invert
#########################################################
# bug: need more enclosures
def filter_mime_size_min(entry, mime_size_min, invert):
return (int(entry.enclosures[0].length) >= int(mime_size_min)) ^ invert
#########################################################
# bug: need more enclosures
def filter_mime_size_max(entry, mime_size_max, invert):
return (int(entry.enclosures[0].length) <= int(mime_size_max)) ^ invert
#########################################################
def process(feed_file, opts, filter_func, invertConditions):
string = feed_file.read()
xml = minidom.parseString(string)
parser = feedparser.parse(string)
entries = parser.entries
items = xml.getElementsByTagName('item')
channel = xml.getElementsByTagName('channel')[0]
m = entries[:]
# filter
for opt, arg in opts:
if filter_func.has_key(opt):
entries = filter(lambda e: filter_func[opt](e, arg, invertConditions[opt] ), entries)
# removes filtered nodes
i = 0
while i < len(m):
if entries.count(m[i]) == 0:
del m[i]
item = xml.getElementsByTagName('item')[i]
channel.removeChild(item)
else:
i += 1
return xml
#########################################################
def main():
usage = '''
Usage: pyfeedfilter [OPTIONS]
-h, --help this help
--title filter feed title
--text filter feed description text
--author filter feed author
--mime-type filter feed enclosure mime-type
--mime-size-min filter feed enclosure mime-type min mime size
--mime-size-max filter feed enclosure mime-type max mime size
pyfeedfilter take feed xml from stdin
for inverse match use ^ after =
'''
try:
options, args = getopt.getopt(sys.argv[1:],
'h',
['help', 'title=', 'text=', 'author=',
'mime-type=', 'mime-size-min=', 'mime-size-max='])
except getopt.GetoptError:
print 'getopt.GetoptError: ', getopt.GetoptError.msg
except:
print "Unknown Error"
for opt, arg in options:
if opt in ("-h", "--help"):
print usage
exit()
filter_func = {}
filter_func['--title'] = filter_title
filter_func['--text'] = filter_text
filter_func['--author'] = filter_author
filter_func['--mime-type'] = filter_mime_type
filter_func['--mime-size-min'] = filter_mime_size_min
filter_func['--mime-size-max'] = filter_mime_size_max
#filter_func[] =
# invert matches
invertCond = {}
for opt, arg in options:
invertCond[opt] = arg[0] == "^"
if arg[0] == "^":
index = options.index( (opt, arg) )
arg = arg[1:]
options[ index ] = (opt, arg)
# processig
xmlDoc = process(sys.stdin, options, filter_func, invertCond)
sys.stdout.write( xmlDoc.toxml("UTF-8") )
#########################################################
#########################################################
if __name__ == '__main__':
main()