| 12345678910111213141516171819202122232425 |
- import re
- _SPACE_PATTERN = re.compile("\\s+")
- class ShingleBased:
- def __init__(self, k=3):
- self.k = k
- def get_k(self):
- return self.k
- def get_profile(self, string):
- shingles = dict()
- no_space_str = _SPACE_PATTERN.sub("", string)
- for i in range(len(no_space_str) - self.k + 1):
- shingle = no_space_str[i:i + self.k]
- old = shingles.get(shingle)
- if old:
- shingles[str(shingle)] = int(old + 1)
- else:
- shingles[str(shingle)] = 1
- return shingles
|