# Levenshtein_distance

 ``` 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71``` ```# -*- coding: utf-8 -*- def distanceLev(a, b, \ add_cost=10, \ del_cost=10, \ change_cost=5, \ change_modifier=1, \ normalise = 0): "Calculates the Levenshtein distance between a and b." change_modifier = 2 n, m = len(a), len(b) if n > m: # Make sure n <= m, to use O(min(n,m)) space a, b = b, a n, m = m, n current_row = range(n+1) # Keep current and previous row, not entire matrix for i in xrange(1, m+1): previous_row = current_row current_row = [i]+[0]*m for j in xrange(1,n+1): add = previous_row[j] + add_cost delete = current_row[j-1] + del_cost change = previous_row[j-1] if a[j-1] != b[i-1]: change += comparer(a[j-1],b[i-1])*change_modifier + change_cost current_row[j] = min(add, delete, change) #for pseudo normalised version summ = ( abs(normalise-1) or len(a)*len(b) ) return 1.*current_row[n]/summ def comparer(a,b,diff=1,mod=1): """Returns difference of 'a' and 'b' if (diff=0) or 'a' and 'b' are not numeric just checks if they are equal 'mod=1' causes function to return abs() of result""" try: if type(a) == type(b) == type('str'): res = ord(a) - ord(b) else: res = (a-b) except: res = (a==b) if mod: try: res = abs(res) except: print a,b,'are not numberic' if not diff: res = (a==b) return (res) s = '123456' print s print 'Changes\t\t\tErrorness' print 'not changed\t\t', distanceLev(s,'123456') print 'Not all symbols(-1)\t', distanceLev(s,'12345') print 'Not all symbols(-2)\t', distanceLev(s,'1234') print 'Not all symbols(-3)\t', distanceLev(s,'123') print 'Not all symbols(-4)\t', distanceLev(s,'12') print 'Not all symbols(-5)\t', distanceLev(s,'1') print 'Removed all symbols\t', distanceLev(s,'') print 'changed +1\t\t', distanceLev(s,'123556') print 'changed +2\t\t', distanceLev(s,'123656') print 'changed +1,+1\t\t', distanceLev(s,'124556') print 'Some extra symbols\t', distanceLev(s,'12344556') print 'More extra symbols\t', distanceLev(s,'123445556') ```