|
@@ -5,7 +5,8 @@ from .string_distance import NormalizedStringDistance
|
|
|
from .string_similarity import NormalizedStringSimilarity
|
|
|
|
|
|
|
|
|
-class Cosine(ShingleBased, NormalizedStringDistance, NormalizedStringSimilarity):
|
|
|
+class Cosine(ShingleBased, NormalizedStringDistance,
|
|
|
+ NormalizedStringSimilarity):
|
|
|
|
|
|
def __init__(self, k):
|
|
|
super().__init__(k)
|
|
@@ -24,10 +25,12 @@ class Cosine(ShingleBased, NormalizedStringDistance, NormalizedStringSimilarity)
|
|
|
return 0.0
|
|
|
profile0 = self.get_profile(s0)
|
|
|
profile1 = self.get_profile(s1)
|
|
|
- return self._dot_product(profile0, profile1) / (self._norm(profile0) * self._norm(profile1))
|
|
|
+ return self._dot_product(profile0, profile1) / (
|
|
|
+ self._norm(profile0) * self._norm(profile1))
|
|
|
|
|
|
def similarity_profiles(self, profile0, profile1):
|
|
|
- return self._dot_product(profile0, profile1) / (self._norm(profile0) * self._norm(profile1))
|
|
|
+ return self._dot_product(profile0, profile1) / (
|
|
|
+ self._norm(profile0) * self._norm(profile1))
|
|
|
|
|
|
@staticmethod
|
|
|
def _dot_product(profile0, profile1):
|
|
@@ -50,13 +53,3 @@ class Cosine(ShingleBased, NormalizedStringDistance, NormalizedStringSimilarity)
|
|
|
for k, v in profile.items():
|
|
|
agg += 1.0 * v * v
|
|
|
return math.sqrt(agg)
|
|
|
-
|
|
|
-
|
|
|
-if __name__ == "__main__":
|
|
|
- cosine = Cosine(1)
|
|
|
- str0 = "上海市宝山区 你好"
|
|
|
- str1 = "上海浦东新区 你好吗"
|
|
|
- d = cosine.distance(str0, str1)
|
|
|
- s = cosine.similarity(str0, str1)
|
|
|
- print(d)
|
|
|
- print(s)
|