^_^

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
# -*- coding: utf-8 -*-
def distanceLev(a, b, \
add_cost=1, \
del_cost=0.5, \
change_cost=0.5, \
change_modif=1.0, \
partial_koeff=0.7,\
extra_koeff=0.5, \
k_error = 1): #error triger koeff
"""Calculates the Levenshtein distance between a and b.
Stops working if current error is bigger then add_cost*len(a)*k_error
costs must be lesser then 1"""
if add_cost>1 or del_cost>1 or change_cost>1:
raise ValueError, \
'add_cost, del_cost, change_cost+change_miduf must be lesser then 0'
n, m = len(a), len(b)
maximum = n * add_cost * k_error
koeff = 100./(n*add_cost)
full = 0
if n == 0:
res = m * del_cost
elif m == 0:
res = n * add_cost
elif a in b:
res = (m-n) * del_cost * extra_koeff
elif b in a:
res = (n-m) * add_cost * partial_koeff
if n > m:
# Make sure n <= m, to use O(min(n,m)) space
a, b = b, a
n, m = m, n
add_cost, del_cost = del_cost, add_cost
if m==0 and n==0:
res = 0;
elif m == 0 or n == 0 or (a in b) or (b in a):
pass
else:
current_row = range(n+1) # Keep current and previous row, not entire matrix
for i in xrange(1, m+1):
if full: break
previous_row = current_row
current_row = [i]+[0]*m
for j in xrange(1,n+1):
add = previous_row[j] + add_cost
delete = current_row[j-1] + del_cost
change = previous_row[j-1]
if a[j-1] != b[i-1]:
x,y = a[j-1],b[i-1]
change += comparer(x,y,normer)*change_modif + change_cost
current_row[j] = min(add, delete, change)
if (current_row[i] >= maximum):
full = 1
current_row[n] = current_row[i]
break
res = current_row[n]
return koeff * res
def comparer(a,b,diff=1,mod=1,relative=1,normer=1):
"""Returns difference of 'a' and 'b'
if (diff=0) or 'a' and 'b' are not numeric just checks if they are equal
'mod=1' causes function to return abs() of result"""
normer += 1
try:
if type(a) == type(b) == type('str'):
a,b = ord(a),ord(b)
res = (a-b)
k = a
except:
res = (a==b)
k = 1
if mod:
try: res = abs(res)
except: print a,b,'are not numberic'
if not diff:
res = (a==b)
if relative:
res = 1.*res/k
return (res)
s = '123456'
print s
print 'Changes\t\t\t Errorness'
print 'not changed\t\t %.2f' % distanceLev(s,'123456')
for i in xrange(len(s)):
print 'Not all symbols(-' +str(i+1) + ')\t %.2f' % distanceLev(s,s[0:-i-1])
print 'changed +2\t\t %.2f' % distanceLev(s,'123656')
print 'changed +4\t\t %.2f' % distanceLev(s,'123856')
print 'changed +5\t\t %.2f' % distanceLev(s,'123956')
print 'changed +1,+1\t\t %.2f' % distanceLev(s,'124556')
print '+2 symbols inside\t %.2f' % distanceLev(s,'12344556')
print '+3 symbols inside\t %.2f' % distanceLev(s,'123445556')
print '+2 symbols outside\t %.2f' % distanceLev(s,'123456dj')
print '+3 symbols outside\t %.2f' % distanceLev(s,'123456djb')
print 'doubles\t\t\t %.2f' % distanceLev(s,'112233445566')
print 'triples\t\t\t %.2f' % distanceLev(s,'111222333444555666')