Explorar o código

Add basic outlier filtering in couples

Vadim Markovtsev %!s(int64=7) %!d(string=hai) anos
pai
achega
c7914a0032
Modificáronse 1 ficheiros con 2 adicións e 0 borrados
  1. 2 0
      labours.py

+ 2 - 0
labours.py

@@ -614,6 +614,8 @@ def train_embeddings(index, matrix, tmpdir, shard_size=IDEAL_SHARD_SIZE):
 
     assert matrix.shape[0] == matrix.shape[1]
     assert len(index) <= matrix.shape[0]
+    outlier_threshold = numpy.percentile(matrix.data, 99)
+    matrix.data[matrix.data > outlier_threshold] = outlier_threshold
     nshards = len(index) // shard_size
     if nshards * shard_size < len(index):
         nshards += 1