optimal_string_alignment.py 1.0 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. import numpy as np
  2. from .string_distance import StringDistance
  3. class OptimalStringAlignment(StringDistance):
  4. def distance(self, s0, s1):
  5. if s0 is None:
  6. raise TypeError("Argument s0 is NoneType.")
  7. if s1 is None:
  8. raise TypeError("Argument s1 is NoneType.")
  9. if s0 == s1:
  10. return 0.0
  11. n, m = len(s0), len(s1)
  12. if n == 0:
  13. return 1.0 * n
  14. if m == 0:
  15. return 1.0 * m
  16. d = np.zeros((n + 2, m + 2))
  17. for i in range(n + 1):
  18. d[i][0] = i
  19. for j in range(m + 1):
  20. d[0][j] = j
  21. for i in range(1, n + 1):
  22. for j in range(1, m + 1):
  23. cost = 1
  24. if s0[i - 1] == s1[j - 1]:
  25. cost = 0
  26. d[i][j] = min(d[i - 1][j - 1] + cost, d[i][j - 1] + 1, d[i - 1][j] + 1)
  27. if i > 1 and j > 1 and s0[i - 1] == s1[j - 2] and s0[i - 2] == s1[j - 1]:
  28. d[i][j] = min(d[i][j], d[i - 2][j - 2] + cost)
  29. return d[n][m]