Browse Source

Add basic outlier filtering in couples

Vadim Markovtsev 7 years ago
parent
commit
c7914a0032
1 changed files with 2 additions and 0 deletions
  1. 2 0
      labours.py

+ 2 - 0
labours.py

@@ -614,6 +614,8 @@ def train_embeddings(index, matrix, tmpdir, shard_size=IDEAL_SHARD_SIZE):
 
     assert matrix.shape[0] == matrix.shape[1]
     assert len(index) <= matrix.shape[0]
+    outlier_threshold = numpy.percentile(matrix.data, 99)
+    matrix.data[matrix.data > outlier_threshold] = outlier_threshold
     nshards = len(index) // shard_size
     if nshards * shard_size < len(index):
         nshards += 1