qgram.py 1.0 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. from .shingle_based import ShingleBased
  2. from .string_distance import StringDistance
  3. class QGram(ShingleBased, StringDistance):
  4. def __init__(self, k=3):
  5. super().__init__(k)
  6. def distance(self, s0, s1):
  7. if s0 is None:
  8. raise TypeError("Argument s0 is NoneType.")
  9. if s1 is None:
  10. raise TypeError("Argument s1 is NoneType.")
  11. if s0 == s1:
  12. return 0.0
  13. profile0 = self.get_profile(s0)
  14. profile1 = self.get_profile(s1)
  15. return self.distance_profile(profile0, profile1)
  16. @staticmethod
  17. def distance_profile(profile0, profile1):
  18. union = set()
  19. for k in profile0.keys():
  20. union.add(k)
  21. for k in profile1.keys():
  22. union.add(k)
  23. agg = 0
  24. for k in union:
  25. v0, v1 = 0, 0
  26. if profile0.get(k) is not None:
  27. v0 = int(profile0.get(k))
  28. if profile1.get(k) is not None:
  29. v1 = int(profile1.get(k))
  30. agg += abs(v0 - v1)
  31. return agg