# Levenshtein distance

 ``` 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96``` ```# -*- coding: utf-8 -*- def distanceLev(a, b, \ add_cost=10, \ del_cost=10, \ change_cost=5, \ change_modifier=3, \ k_error = 1): #error koeff "Calculates the Levenshtein distance between a and b." n, m = len(a), len(b) if n > m: # Make sure n <= m, to use O(min(n,m)) space a, b = b, a n, m = m, n maximum = m * max(add_cost,del_cost,change_cost) * k_error try: koeff = 1.*max(m,n) / (min(m,n)) / (m+n) except: koeff = 1 full = 0 if m==0 and n==0: res = 0; elif m == 0: res = n * add_cost elif n == 0: res = m * del_cost elif a in b: res = (m-n) * del_cost elif b in a: res = (n-m) * add_cost else: current_row = range(n+1) # Keep current and previous row, not entire matrix koeff = 1.*max(m,n) / (min(m,n)) / (m+n) for i in xrange(1, m+1): if full: break previous_row = current_row current_row = [i]+*m for j in xrange(1,n+1): add = previous_row[j] + add_cost delete = current_row[j-1] + del_cost change = previous_row[j-1] if a[j-1] != b[i-1]: change += comparer(a[j-1],b[i-1])*change_modifier + change_cost current_row[j] = min(add, delete, change) if (koeff*current_row[i] >= maximum) and (maximum != None): full = 1 current_row[n] = current_row[i] break res = current_row[n] return koeff * res * 100 / maximum / k_error def comparer(a,b,diff=1,mod=1): """Returns difference of 'a' and 'b' if (diff=0) or 'a' and 'b' are not numeric just checks if they are equal 'mod=1' causes function to return abs() of result""" try: if type(a) == type(b) == type('str'): res = ord(a) - ord(b) else: res = (a-b) except: res = (a==b) if mod: try: res = abs(res) except: print a,b,'are not numberic' if not diff: res = (a==b) return (res) s = '123456' print s print 'Changes\t\t\t Errorness' print 'not changed\t\t %.2f' % distanceLev(s,'123456') for i in xrange(len(s)): print 'Not all symbols(-' +str(i+1) + ')\t %.2f' % distanceLev(s,s[0:-i-1]) print 'changed +1\t\t %.2f' % distanceLev(s,'123556') print 'changed +2\t\t %.2f' % distanceLev(s,'123656') print 'changed +3\t\t %.2f' % distanceLev(s,'123756') print 'changed +1,+1\t\t %.2f' % distanceLev(s,'124556') print 'Some extra symbols\t %.2f' % distanceLev(s,'12344556') print 'More extra symbols\t %.2f' % distanceLev(s,'123445556') print 'doubles\t\t\t %.2f' % distanceLev(s,'112233445566') ```