encoding utf-8 Talisman bot log parser usage python parser py director

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# -*- encoding: utf-8 -*-
# Talisman bot log parser.
# usage: python parser.py directory_name
import csv
import glob
import os
import re
import sys
def find_main_name(name):
global nicknames
for main, others in nicknames.iteritems():
if name in others:
return main
return name
working_dir = os.path.abspath(sys.argv[1])
users = {}
nick_re = re.compile(r'<\s*span\s+class\s*=\s*"self.*?"\s*>&lt;(.+?)&gt;<\s*/\s*span\s*>')
nicknames = {}
with open('nicknames.csv', 'rb') as cfgfile:
reader = csv.reader(cfgfile)
for row in reader:
nicknames[row[0]] = row[1:]
users = {}
for filename in glob.glob(os.path.join(working_dir, '*')):
with open(os.path.join(working_dir, filename), 'r') as f:
matches = nick_re.findall(f.read())
for name in matches:
name = find_main_name(name)
users.setdefault(name, 0)
users[name] += 1
total_msgs = sum(users.values())
for name, val in sorted(users.iteritems(), lambda x, y: cmp(x[1], y[1]), reverse=True):
print '{0:6d} : {2:5.2f}% : {1}'.format(val, name, round(float(val) / total_msgs * 100, 2))
print '-' * 30
print '{0:6d} : : {1} users'.format(total_msgs, len(users))