import codecs re codecs open mini xml utf-8 for line in re search gr l

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
a = []
b = []
c = []
h = {}
import codecs, re
p = codecs.open('mini.xml', 'r', 'utf-8')
for line in p:
g = re.search('gr=\"(.*?)\">', line)
if g != None:
a.append(g.group(1))
a = [x.replace(u'=', u',') for x in a]
for i in range(len(a)-1):
b.append(a[i])
b.append(a[i+1])
if len(b) == 2:
c.append(b)
b = []
for i,j in c:
k = i + u' ' + j
if h.has_key(k):
h[k] += 1
else:
h[k] = 1
print h