encoding utf-8 Talisman bot log parser usage python parser py director

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# -*- encoding: utf-8 -*-
# Talisman bot log parser.
# usage: python parser.py directory_name
import math
import os
import re
import sys
if __name__ == '__main__':
working_dir = os.path.abspath(sys.argv[1])
files = os.listdir(working_dir)
users = {}
nick_re = re.compile(r'<span\s+class\s*=\s*"self.*">&lt;(.+)&gt;</span>')
with open('nicknames.txt') as cfg:
nicknames = eval(cfg.read())
for filename in files:
with open(working_dir + '\\' + filename, 'r') as file:
for line in file:
nick_match = nick_re.search(line)
if nick_match:
nick = unicode(nick_match.group(1))
if nick in nicknames.keys():
nick = nicknames[nick]
if nick in users.keys():
users[nick] += 1
else:
users[nick] = 1
total_msgs = sum(users.values())
for name, val in sorted(users.iteritems(), lambda x, y: cmp(x[1], y[1]), reverse=True):
print '{0:6d} : {2:5.2f}% : {1}'.format(val, name, round(float(val) / total_msgs * 100, 2))
print '-' * 30
print '{0:6d} : : {1} users'.format(total_msgs, len(users))