weighted_levenshtein_test.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. # Copyright (c) 2018 luozhouyang
  2. #
  3. # Permission is hereby granted, free of charge, to any person obtaining a copy
  4. # of this software and associated documentation files (the "Software"), to deal
  5. # in the Software without restriction, including without limitation the rights
  6. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. # copies of the Software, and to permit persons to whom the Software is
  8. # furnished to do so, subject to the following conditions:
  9. #
  10. # The above copyright notice and this permission notice shall be included in all
  11. # copies or substantial portions of the Software.
  12. #
  13. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  19. # SOFTWARE.
  20. import unittest
  21. from .weighted_levenshtein import WeightedLevenshtein, CharacterSubstitutionInterface
  22. class CharSub(CharacterSubstitutionInterface):
  23. def cost(self, c0, c1):
  24. return 1.0
  25. class TestWeightedLevenshtein(unittest.TestCase):
  26. def test_weighted_levenshtein(self):
  27. a = WeightedLevenshtein(character_substitution=CharSub())
  28. s0 = ""
  29. s1 = ""
  30. s2 = "上海"
  31. s3 = "上海市"
  32. distance_format = "distance: {:.4}\t between {} and {}"
  33. print(distance_format.format(str(a.distance(s0, s1)), s0, s1))
  34. print(distance_format.format(str(a.distance(s0, s2)), s0, s2))
  35. print(distance_format.format(str(a.distance(s0, s3)), s0, s3))
  36. print(distance_format.format(str(a.distance(s1, s2)), s1, s2))
  37. print(distance_format.format(str(a.distance(s1, s3)), s1, s3))
  38. print(distance_format.format(str(a.distance(s2, s3)), s2, s3))
  39. if __name__ == "__main__":
  40. unittest.main()