luozhouyang 5 anni fa
parent
commit
1d7e32198e
3 ha cambiato i file con 31 aggiunte e 1 eliminazioni
  1. 9 0
      README.md
  2. 1 1
      strsimpy/__init__.py
  3. 21 0
      strsimpy/sift4_test.py

+ 9 - 0
README.md

@@ -393,7 +393,16 @@ Distance is computed as 1 - similarity.
 ### SIFT4
 SIFT4 is a general purpose string distance algorithm inspired by JaroWinkler and Longest Common Subsequence. It was developed to produce a distance measure that matches as close as possible to the human perception of string distance. Hence it takes into account elements like character substitution, character distance, longest common subsequence etc. It was developed using experimental testing, and without theoretical background.
 
+```python
+from strsimpy import SIFT4
+
+s = SIFT4()
 
+# result: 11.0
+s.distance('This is the first string', 'And this is another string') # 11.0
+# result: 12.0
+s.distance('Lorem ipsum dolor sit amet, consectetur adipiscing elit.', 'Amet Lorm ispum dolor sit amet, consetetur adixxxpiscing elit.', maxoffset=10)
+```
 
 ## Users
 * [StringSimilarity.NET](https://github.com/feature23/StringSimilarity.NET) a .NET port of java-string-similarity

+ 1 - 1
strsimpy/__init__.py

@@ -34,7 +34,7 @@ from .sorensen_dice import SorensenDice
 from .string_distance import StringDistance
 from .string_similarity import StringSimilarity
 from .weighted_levenshtein import WeightedLevenshtein
-from .sift4 import SIFT4
+from .sift4 import SIFT4Options, SIFT4
 
 __name__ = 'strsimpy'
 __version__ = '0.1.9'

+ 21 - 0
strsimpy/sift4_test.py

@@ -0,0 +1,21 @@
+import unittest
+
+from .sift4 import SIFT4
+
+
+class SIFT4Test(unittest.TestCase):
+
+    def testSIFT4(self):
+        s = SIFT4()
+        
+        results = [
+            ('This is the first string', 'And this is another string', 5, 11.0),
+            ('Lorem ipsum dolor sit amet, consectetur adipiscing elit.', 'Amet Lorm ispum dolor sit amet, consetetur adixxxpiscing elit.', 10, 12.0)
+        ]
+
+        for a, b, offset, res in results:
+            self.assertEquals(res, s.distance(a, b, maxoffset=offset))
+
+
+if __name__ == "__main__":
+    unittest.main()