longest_common_subsequence.py 1.0 KB

123456789101112131415161718192021222324252627282930313233
  1. import numpy as np
  2. from .string_distance import StringDistance
  3. class LongestCommonSubsequence(StringDistance):
  4. def distance(self, s0, s1):
  5. if s0 is None:
  6. raise TypeError("Argument s0 is NoneType.")
  7. if s1 is None:
  8. raise TypeError("Argument s1 is NoneType.")
  9. if s0 == s1:
  10. return 0.0
  11. return len(s0) + len(s1) - 2 * self.length(s0, s1)
  12. @staticmethod
  13. def length(s0, s1):
  14. if s0 is None:
  15. raise TypeError("Argument s0 is NoneType.")
  16. if s1 is None:
  17. raise TypeError("Argument s1 is NoneType.")
  18. s0_len, s1_len = len(s0), len(s1)
  19. x, y = s0[:], s1[:]
  20. n, m = s0_len + 1, s1_len + 1
  21. matrix = np.zeros((n, m))
  22. for i in range(1, s0_len + 1):
  23. for j in range(1, s1_len + 1):
  24. if x[i - 1] == y[j - 1]:
  25. matrix[i][j] = matrix[i - 1][j - 1] + 1
  26. else:
  27. matrix[i][j] = max(matrix[i][j - 1], matrix[i - 1][j])
  28. return matrix[s0_len][s1_len]