damerau.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. from .string_distance import MetricStringDistance
  2. import numpy as np
  3. class Damerau(MetricStringDistance):
  4. def distance(self, s0, s1):
  5. if s0 is None:
  6. raise TypeError("Argument s0 is NoneType.")
  7. if s1 is None:
  8. raise TypeError("Argument s1 is NoneType.")
  9. if s0 == s1:
  10. return 0.0
  11. inf = int(len(s0) + len(s1))
  12. da = dict()
  13. for i in range(len(s0)):
  14. da[s0[i]] = str(0)
  15. for i in range(len(s1)):
  16. da[s1[i]] = str(0)
  17. h = np.zeros((len(s0) + 2, len(s1) + 2))
  18. for i in range(len(s0) + 1):
  19. h[i + 1][0] = inf
  20. h[i + 1][1] = i
  21. for j in range(len(s1) + 1):
  22. h[0][j + 1] = inf
  23. h[1][j + 1] = j
  24. for i in range(1, len(s0) + 1):
  25. db = 0
  26. for j in range(1, len(s1) + 1):
  27. i1 = int(da[s1[j - 1]])
  28. j1 = db
  29. cost = 1
  30. if s0[i - 1] == s1[j - 1]:
  31. cost = 0
  32. db = j
  33. h[i + 1][j + 1] = min(h[i][j] + cost,
  34. h[i + 1][j] + 1,
  35. h[i][j + 1] + 1,
  36. h[i1][j1] + (i - i1 - 1) + 1 + (j - j1 - 1))
  37. da[s0[i - 1]] = str(i)
  38. return h[len(s0) + 1][len(s1) + 1]