| 1234567891011121314151617181920212223242526272829303132 |
- from .shingle_based import ShingleBased
- from .string_distance import NormalizedStringDistance, MetricStringDistance
- from .string_similarity import NormalizedStringSimilarity
- class Jaccard(ShingleBased, MetricStringDistance, NormalizedStringDistance, NormalizedStringSimilarity):
- def __init__(self, k):
- super().__init__(k)
- def distance(self, s0, s1):
- 1.0 - self.similarity(s0, s1)
- def similarity(self, s0, s1):
- if s0 is None:
- raise TypeError("Argument s0 is NoneType.")
- if s1 is None:
- raise TypeError("Argument s1 is NoneType.")
- if s0 == s1:
- return 1.0
- if len(s0) < self.get_k() or len(s1) < self.get_k():
- return 0.0
- profile0 = self.get_profile(s0)
- profile1 = self.get_profile(s1)
- union = set()
- for ite in profile0.keys():
- union.add(ite)
- for ite in profile1.keys():
- union.add(ite)
- inter = int(len(profile0.keys()) + len(profile1.keys()) - len(union))
- return 1.0 * inter / len(union)
|