| 
					
				 | 
			
			
				@@ -29,6 +29,7 @@ def parse_args(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         help="Path to the output file/directory (empty for display).") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     parser.add_argument("-i", "--input", default="-", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         help="Path to the input file (- for stdin).") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    parser.add_argument("-f", "--input-format", default="yaml", choices=["yaml", "pb"]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     parser.add_argument("--text-size", default=12, type=int, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         help="Size of the labels and legend.") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     parser.add_argument("--backend", help="Matplotlib backend to use.") 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -56,29 +57,122 @@ def parse_args(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     return args 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+class Reader(object): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def read(self, file): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        raise NotImplementedError 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_name(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        raise NotImplementedError 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_header(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        raise NotImplementedError 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_project_burndown(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        raise NotImplementedError 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_files_burndown(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        raise NotImplementedError 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_people_burndown(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        raise NotImplementedError 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_ownership_burndown(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        raise NotImplementedError 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_people_interaction(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        raise NotImplementedError 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_files_coocc(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        raise NotImplementedError 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_people_coocc(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        raise NotImplementedError 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+class YamlReader(Reader): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def read(self, file): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        yaml.reader.Reader.NON_PRINTABLE = re.compile(r"(?!x)x") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            loader = yaml.CLoader 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        except AttributeError: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print("Warning: failed to import yaml.CLoader, falling back to slow yaml.Loader") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            loader = yaml.Loader 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if file != "-": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                with open(file) as fin: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    data = yaml.load(fin, Loader=loader) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                data = yaml.load(sys.stdin, Loader=loader) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        except (UnicodeEncodeError, yaml.reader.ReaderError) as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print("\nInvalid unicode in the input: %s\nPlease filter it through " 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                  "fix_yaml_unicode.py" % e) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            sys.exit(1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print("done") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        self.data = data 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_name(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return next(iter(self.data["project"])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_header(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        header = self.data["burndown"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return header["begin"], header["end"], header["sampling"], header["granularity"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_project_burndown(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        name, matrix = next(iter(self.data["project"].items())) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return name, self._parse_burndown_matrix(matrix).T 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_files_burndown(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return [(p[0], self._parse_burndown_matrix(p[1]).T) for p in self.data["files"].items()] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_people_burndown(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return [(p[0], self._parse_burndown_matrix(p[1]).T) for p in self.data["people"].items()] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_ownership_burndown(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return self.data["people_sequence"], {p[0]: self._parse_burndown_matrix(p[1]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                              for p in self.data["people"].items()} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_people_interaction(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return self.data["people_sequence"], self._parse_burndown_matrix(self.data["people_interaction"]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_files_coocc(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        coocc = self.data["files_coocc"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return coocc["index"], self._parse_coocc_matrix(coocc["matrix"]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_people_coocc(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        coocc = self.data["people_coocc"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return coocc["index"], self._parse_coocc_matrix(coocc["matrix"]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def _parse_burndown_matrix(self, matrix): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return numpy.array([numpy.fromstring(line, dtype=int, sep=" ") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            for line in matrix.split("\n")]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def _parse_coocc_matrix(self, matrix): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        from scipy.sparse import csr_matrix 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        data = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        indices = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        indptr = [0] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for row in matrix: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            for k, v in sorted(row.items()): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                data.append(v) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                indices.append(k) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            indptr.append(indptr[-1] + len(row)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return csr_matrix((data, indices, indptr), shape=(len(matrix),) * 2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+class ProtobufReader(Reader): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def read(self, file): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        pass 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+READERS = {"yaml": YamlReader, "pb": ProtobufReader} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def read_input(args): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     sys.stdout.write("Reading the input... ") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     sys.stdout.flush() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    yaml.reader.Reader.NON_PRINTABLE = re.compile(r"(?!x)x") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        loader = yaml.CLoader 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    except AttributeError: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        print("Warning: failed to import yaml.CLoader, falling back to slow yaml.Loader") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        loader = yaml.Loader 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if args.input != "-": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            with open(args.input) as fin: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                data = yaml.load(fin, Loader=loader) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            data = yaml.load(sys.stdin, Loader=loader) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    except (UnicodeEncodeError, yaml.reader.ReaderError) as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        print("\nInvalid unicode in the input: %s\nPlease filter it through fix_yaml_unicode.py" % 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-              e) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        sys.exit(1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    print("done") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    return data["burndown"], data["project"], data.get("files"), data.get("people_sequence"), \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-           data.get("people"), data.get("people_interaction"), data.get("files_coocc"), \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-           data.get("people_coocc") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    reader = READERS[args.input_format]() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    reader.read(args.input) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return reader 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def calculate_average_lifetime(matrix): 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -95,19 +189,12 @@ def calculate_average_lifetime(matrix): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             / (lifetimes.sum() * matrix.shape[1])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-def load_main(header, name, matrix, resample): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def load_burndown(header, name, matrix, resample): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     import pandas 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    start = header["begin"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    last = header["end"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    granularity = header["granularity"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    sampling = header["sampling"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    start = datetime.fromtimestamp(int(start)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    last = datetime.fromtimestamp(int(last)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    granularity = int(granularity) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    sampling = int(sampling) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    matrix = numpy.array([numpy.fromstring(line, dtype=int, sep=" ") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                          for line in matrix.split("\n")]).T 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    start, last, sampling, granularity = header 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    start = datetime.fromtimestamp(start) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    last = datetime.fromtimestamp(last) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     print(name, "lifetime index:", calculate_average_lifetime(matrix)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     finish = start + timedelta(days=matrix.shape[1] * sampling) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     if resample not in ("no", "raw"): 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -184,25 +271,15 @@ def load_main(header, name, matrix, resample): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     return name, matrix, date_range_sampling, labels, granularity, sampling, resample 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-def load_churn_matrix(contents): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    matrix = numpy.array([numpy.fromstring(line, dtype=int, sep=" ") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                          for line in contents.split("\n")]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    return matrix 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def load_people(header, sequence, contents): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     import pandas 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    start = header["begin"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    last = header["end"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    sampling = header["sampling"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    start = datetime.fromtimestamp(int(start)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    last = datetime.fromtimestamp(int(last)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    sampling = int(sampling) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    start, last, sampling, _ = header 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    start = datetime.fromtimestamp(start) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    last = datetime.fromtimestamp(last) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     people = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     for name in sequence: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        people.append(numpy.array([numpy.fromstring(line, dtype=int, sep=" ") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                   for line in contents[name].split("\n")]).sum(axis=1)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        people.append(contents[name].sum(axis=1)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     people = numpy.array(people) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     date_range_sampling = pandas.date_range( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         start + timedelta(days=sampling), periods=people[0].shape[0], 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -327,13 +404,13 @@ def plot_burndown(args, target, name, matrix, date_range_sampling, labels, granu 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def plot_many(args, target, header, parts): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     if not args.output: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         print("Warning: output not set, showing %d plots." % len(parts)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    itercnt = progress.bar(parts.items(), expected_size=len(parts)) \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if progress is not None else parts.items() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    itercnt = progress.bar(parts, expected_size=len(parts)) \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if progress is not None else parts 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     stdout = io.StringIO() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     for name, matrix in itercnt: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         backup = sys.stdout 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         sys.stdout = stdout 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        plot_burndown(args, target, *load_main(header, name, matrix, args.resample)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        plot_burndown(args, target, *load_burndown(header, name, matrix, args.resample)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         sys.stdout = backup 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     sys.stdout.write(stdout.getvalue()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -420,15 +497,15 @@ def plot_people(args, repo, names, people, date_range, last): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     deploy_plot("%s code ownership through time" % repo, output, args.style) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-def train_embeddings(coocc_tree, tmpdir, shard_size=4096): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    from scipy.sparse import csr_matrix 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def train_embeddings(index, matrix, tmpdir, shard_size=4096): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         from . import swivel 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     except (SystemError, ImportError): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         import swivel 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     import tensorflow as tf 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    index = coocc_tree["index"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    assert matrix.shape[0] == matrix.shape[1] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    assert len(index) <= matrix.shape[0] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     nshards = len(index) // shard_size 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     if nshards * shard_size < len(index): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         nshards += 1 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -436,23 +513,12 @@ def train_embeddings(coocc_tree, tmpdir, shard_size=4096): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         nshards = len(index) // shard_size 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     remainder = len(index) - nshards * shard_size 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     if remainder > 0: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        lengths = numpy.array([len(cd) for cd in coocc_tree["matrix"]]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        lengths = matrix.indptr[1:] - matrix.indptr[:-1] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         filtered = sorted(numpy.argsort(lengths)[remainder:]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         filtered = list(range(len(index))) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    print("Reading the sparse matrix...") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    data = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    indices = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    indptr = [0] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    for row, cd in enumerate(coocc_tree["matrix"]): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if row >= len(index): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        for col, val in sorted(cd.items()): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            data.append(val) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            indices.append(col) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        indptr.append(indptr[-1] + len(cd)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    matrix = csr_matrix((data, indices, indptr), shape=(len(index), len(index))) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    if len(filtered) < len(index): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if len(filtered) < matrix.shape[0]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print("Truncating the sparse matrix...") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         matrix = matrix[filtered, :][:, filtered] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     meta_index = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     for i, j in enumerate(filtered): 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -617,62 +683,71 @@ def write_embeddings(name, output, run_server, index, embeddings): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def main(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     args = parse_args() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    header, main_contents, files_contents, people_sequence, people_contents, people_matrix, \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        files_coocc, people_coocc = read_input(args) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    name = next(iter(main_contents)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    reader = read_input(args) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    header = reader.get_header() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    name = reader.get_name() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     files_warning = "Files stats were not collected. Re-run hercules with -files." 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     people_warning = "People stats were not collected. Re-run hercules with -people." 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     couples_warning = "Coupling stats were not collected. Re-run hercules with -couples." 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    if args.mode == "project": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def project_burndown(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         plot_burndown(args, "project", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                      *load_main(header, name, main_contents[name], args.resample)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    elif args.mode == "file": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if not files_contents: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                      *load_burndown(header, *reader.get_project_burndown(), args.resample)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def files_burndown(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            plot_many(args, "file", header, reader.get_files_burndown()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        except KeyError: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             print(files_warning) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        plot_many(args, "file", header, files_contents) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    elif args.mode == "person": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if not people_contents: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def people_burndown(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            plot_many(args, "person", header, reader.get_people_burndown()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        except KeyError: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             print(people_warning) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        plot_many(args, "person", header, people_contents) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    elif args.mode == "churn_matrix": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if not people_contents: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def churn_matrix(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            plot_churn_matrix(args, name, *reader.get_people_interaction()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        except KeyError: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             print(people_warning) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        plot_churn_matrix(args, name, people_sequence, load_churn_matrix(people_matrix)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    elif args.mode == "people": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if not people_contents: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def ownership_burndown(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            plot_people(args, name, *load_people(header, *reader.get_ownership_burndown())) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        except KeyError: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             print(people_warning) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        plot_people(args, name, *load_people(header, people_sequence, people_contents)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    elif args.mode == "couples": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if not files_coocc or not people_coocc: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            print(couples_warning) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        write_embeddings("files", args.output, not args.disable_projector, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                         *train_embeddings(files_coocc, args.couples_tmp_dir)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        write_embeddings("people", args.output, not args.disable_projector, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                         *train_embeddings(people_coocc, args.couples_tmp_dir)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    elif args.mode == "all": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        plot_burndown(args, "project", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                      *load_main(header, name, main_contents[name], args.resample)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if files_contents: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            plot_many(args, "file", header, files_contents) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if people_contents: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            plot_many(args, "person", header, people_contents) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            plot_churn_matrix(args, name, people_sequence, load_churn_matrix(people_matrix)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            plot_people(args, name, *load_people(header, people_sequence, people_contents)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if people_coocc: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if not files_coocc or not people_coocc: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                print(couples_warning) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def couples(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             write_embeddings("files", args.output, not args.disable_projector, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                             *train_embeddings(files_coocc, args.couples_tmp_dir)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                             *train_embeddings(*reader.get_files_coocc(), args.couples_tmp_dir)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             write_embeddings("people", args.output, not args.disable_projector, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                             *train_embeddings(people_coocc, args.couples_tmp_dir)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                             *train_embeddings(*reader.get_people_coocc(), args.couples_tmp_dir)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        except KeyError: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print(couples_warning) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if args.mode == "project": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        project_burndown() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    elif args.mode == "file": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        files_burndown() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    elif args.mode == "person": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        people_burndown() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    elif args.mode == "churn_matrix": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        churn_matrix() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    elif args.mode == "people": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ownership_burndown() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    elif args.mode == "couples": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        couples() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    elif args.mode == "all": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        project_burndown() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        files_burndown() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        people_burndown() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        churn_matrix() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ownership_burndown() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        couples() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     if web_server.running: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         print("Sleeping for 60 seconds, safe to Ctrl-C") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         try: 
			 |