فهرست منبع

Add -files to output per-file stats

Vadim Markovtsev 8 سال پیش
والد
کامیت
9a973b2200
3فایلهای تغییر یافته به همراه168 افزوده شده و 61 حذف شده
  1. 89 21
      analyser.go
  2. 53 27
      cmd/hercules/main.go
  3. 26 13
      file.go

+ 89 - 21
analyser.go

@@ -149,7 +149,8 @@ func (analyser *Analyser) handleInsertion(
 	if exists {
 		panic(fmt.Sprintf("file %s already exists", name))
 	}
-	file = NewFile(day, lines, status)
+	// The second status is specific to each file.
+	file = NewFile(day, lines, status, make(map[int]int64))
 	files[name] = file
 }
 
@@ -310,7 +311,10 @@ func (analyser *Analyser) Commits() []*object.Commit {
 	return result
 }
 
-func (analyser *Analyser) groupStatus(status map[int]int64, day int) []int64 {
+func (analyser *Analyser) groupStatus(
+    status map[int]int64,
+    files map[string]*File,
+    day int) ([]int64, map[string][]int64) {
 	granularity := analyser.Granularity
 	if granularity == 0 {
 		granularity = 1
@@ -320,19 +324,70 @@ func (analyser *Analyser) groupStatus(status map[int]int64, day int) []int64 {
 	if day%granularity < granularity-1 {
 		adjust = 1
 	}
-	result := make([]int64, day/granularity+adjust)
+	global := make([]int64, day/granularity+adjust)
 	var group int64
 	for i := 0; i < day; i++ {
 		group += status[i]
 		if i%granularity == (granularity - 1) {
-			result[i/granularity] = group
+			global[i/granularity] = group
 			group = 0
 		}
 	}
 	if day%granularity < granularity-1 {
-		result[len(result)-1] = group
+		global[len(global)-1] = group
+	}
+	locals := make(map[string][]int64)
+	for key, file := range files {
+		status := make([]int64, day/granularity+adjust)
+		var group int64
+		for i := 0; i < day; i++ {
+			group += file.Status(1)[i]
+			if i%granularity == (granularity - 1) {
+				status[i/granularity] = group
+				group = 0
+			}
+		}
+		if day%granularity < granularity-1 {
+			status[len(status)-1] = group
+		}
+		locals[key] = status
 	}
-	return result
+	return global, locals
+}
+
+func (analyser *Analyser) updateHistories(
+    global_history [][]int64, global_status []int64,
+    file_histories map[string][][]int64, file_statuses map[string][]int64,
+    delta int) [][]int64 {
+	for i := 0; i < delta; i++ {
+		global_history = append(global_history, global_status)
+	}
+	to_delete := make([]string, 0)
+	for key, fh := range file_histories {
+		ls, exists := file_statuses[key]
+		if !exists {
+			to_delete = append(to_delete, key)
+		} else {
+			for i := 0; i < delta; i++ {
+				fh = append(fh, ls)
+			}
+			file_histories[key] = fh
+		}
+	}
+	for _, key := range to_delete {
+		delete(file_histories, key)
+	}
+	for key, ls := range file_statuses {
+		fh, exists := file_histories[key]
+		if exists {
+			continue
+		}
+		for i := 0; i < delta; i++ {
+			fh = append(fh, ls)
+		}
+		file_histories[key] = fh
+	}
+	return global_history
 }
 
 type sortableChange struct {
@@ -594,12 +649,13 @@ func (analyser *Analyser) detectRenames(
 // commits is a slice with the sequential commit history. It shall start from
 // the root (ascending order).
 //
-// Returns the list of snapshots of the cumulative line edit times.
+// Returns the list of snapshots of the cumulative line edit times and the
+// similar lists for every file which is alive in HEAD.
 // The number of snapshots (the first dimension >[]<[]int64) depends on
 // Analyser.Sampling (the more Sampling, the less the value); the length of
 // each snapshot depends on Analyser.Granularity (the more Granularity,
 // the less the value).
-func (analyser *Analyser) Analyse(commits []*object.Commit) [][]int64 {
+func (analyser *Analyser) Analyse(commits []*object.Commit) ([][]int64, map[string][][]int64) {
 	sampling := analyser.Sampling
 	if sampling == 0 {
 		sampling = 1
@@ -614,9 +670,11 @@ func (analyser *Analyser) Analyse(commits []*object.Commit) [][]int64 {
 
 	// current daily alive number of lines; key is the number of days from the
 	// beginning of the history
-	status := map[int]int64{}
+	global_status := map[int]int64{}
 	// weekly snapshots of status
-	statuses := [][]int64{}
+	global_history := [][]int64{}
+	// weekly snapshots of each file's status
+	file_histories := map[string][][]int64{}
 	// mapping <file path> -> hercules.File
 	files := map[string]*File{}
 
@@ -646,7 +704,7 @@ func (analyser *Analyser) Analyse(commits []*object.Commit) [][]int64 {
 					}
 					lines, err := loc(&file.Blob)
 					if err == nil {
-						files[file.Name] = NewFile(0, lines, status)
+						files[file.Name] = NewFile(0, lines, global_status, make(map[int]int64))
 					}
 				}
 			}()
@@ -659,10 +717,9 @@ func (analyser *Analyser) Analyse(commits []*object.Commit) [][]int64 {
 			delta := (day / sampling) - (prev_day / sampling)
 			if delta > 0 {
 				prev_day = day
-				gs := analyser.groupStatus(status, day)
-				for i := 0; i < delta; i++ {
-					statuses = append(statuses, gs)
-				}
+				gs, fss := analyser.groupStatus(global_status, files, day)
+				global_history = analyser.updateHistories(
+					global_history, gs, file_histories, fss, delta)
 			}
 			tree_diff, err := object.DiffTree(prev_tree, tree)
 			if err != nil {
@@ -683,9 +740,9 @@ func (analyser *Analyser) Analyse(commits []*object.Commit) [][]int64 {
 				}
 				switch action {
 				case merkletrie.Insert:
-					analyser.handleInsertion(change, day, status, files, cache)
+					analyser.handleInsertion(change, day, global_status, files, cache)
 				case merkletrie.Delete:
-					analyser.handleDeletion(change, day, status, files, cache)
+					analyser.handleDeletion(change, day, global_status, files, cache)
 				case merkletrie.Modify:
 					func() {
 						defer func() {
@@ -696,14 +753,25 @@ func (analyser *Analyser) Analyse(commits []*object.Commit) [][]int64 {
 								panic(r)
 							}
 						}()
-						analyser.handleModification(change, day, status, files, cache)
+						analyser.handleModification(change, day, global_status, files, cache)
 					}()
 				}
 			}
 		}
 		prev_tree = tree
 	}
-	gs := analyser.groupStatus(status, day)
-	statuses = append(statuses, gs)
-	return statuses
+	gs, fss := analyser.groupStatus(global_status, files, day)
+	global_history = analyser.updateHistories(
+		global_history, gs, file_histories, fss, 1)
+	for key, statuses := range file_histories {
+		if len(statuses) == len(global_history) {
+			continue
+		}
+		padding := make([][]int64, len(global_history) - len(statuses))
+		for i := range padding {
+			padding[i] = make([]int64, len(global_status))
+		}
+		file_histories[key] = append(padding, statuses...)
+	}
+	return global_history, file_histories
 }

+ 53 - 27
cmd/hercules/main.go

@@ -26,6 +26,7 @@ import (
 	"gopkg.in/src-d/go-git.v4/storage/filesystem"
 	"gopkg.in/src-d/go-git.v4/storage/memory"
 	"gopkg.in/src-d/hercules.v1"
+	"sort"
 )
 
 func loadCommitsFromFile(path string, repository *git.Repository) []*object.Commit {
@@ -55,11 +56,55 @@ func loadCommitsFromFile(path string, repository *git.Repository) []*object.Comm
 	return commits
 }
 
+func printStatuses(statuses [][]int64, name string) {
+	// determine the maximum length of each value
+	var maxnum int64
+	for _, status := range statuses {
+		for _, val := range status {
+			if val > maxnum {
+				maxnum = val
+			}
+		}
+	}
+	width := len(strconv.FormatInt(maxnum, 10))
+	last := len(statuses[len(statuses)-1])
+	if name != "" {
+		fmt.Println(name)
+	}
+	// print the resulting triangle matrix
+	for _, status := range statuses {
+		for i := 0; i < last; i++ {
+			var val int64
+			if i < len(status) {
+				val = status[i]
+				// not sure why this sometimes happens...
+				// TODO(vmarkovtsev): find the root cause of tiny negative balances
+				if val < 0 {
+					val = 0
+				}
+			}
+			fmt.Printf("%[1]*[2]d ", width, val)
+		}
+		fmt.Println()
+	}
+}
+
+func sortedKeys(m map[string][][]int64) []string {
+	keys := make([]string, 0, len(m))
+	for k := range m {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+	return keys
+}
+
 func main() {
+	var with_files bool
 	var profile bool
 	var granularity, sampling, similarity_threshold int
 	var commitsFile string
 	var debug bool
+	flag.BoolVar(&with_files, "files", false, "Output detailed statistics per each file.")
 	flag.BoolVar(&profile, "profile", false, "Collect the profile to hercules.pprof.")
 	flag.IntVar(&granularity, "granularity", 30, "Report granularity in days.")
 	flag.IntVar(&sampling, "sampling", 30, "Report sampling in days.")
@@ -132,40 +177,21 @@ func main() {
 	} else {
 		commits = loadCommitsFromFile(commitsFile, repository)
 	}
-	statuses := analyser.Analyse(commits)
+	global_statuses, file_statuses := analyser.Analyse(commits)
 	fmt.Fprint(os.Stderr, "                \r")
-	if len(statuses) == 0 {
+	if len(global_statuses) == 0 {
 		return
 	}
-	// determine the maximum length of each value
-	var maxnum int64
-	for _, status := range statuses {
-		for _, val := range status {
-			if val > maxnum {
-				maxnum = val
-			}
-		}
-	}
-	width := len(strconv.FormatInt(maxnum, 10))
-	last := len(statuses[len(statuses)-1])
 	// print the start date, granularity, sampling
 	fmt.Println(commits[0].Author.When.Unix(),
 		commits[len(commits)-1].Author.When.Unix(),
 		granularity, sampling)
-	// print the resulting triangle matrix
-	for _, status := range statuses {
-		for i := 0; i < last; i++ {
-			var val int64
-			if i < len(status) {
-				val = status[i]
-				// not sure why this sometimes happens...
-				// TODO(vmarkovtsev): find the root cause of tiny negative balances
-				if val < 0 {
-					val = 0
-				}
-			}
-			fmt.Printf("%[1]*[2]d ", width, val)
+	printStatuses(global_statuses, "")
+	if with_files {
+		keys := sortedKeys(file_statuses)
+		for _, key := range keys {
+			fmt.Println()
+			printStatuses(file_statuses[key], key)
 		}
-		fmt.Println()
 	}
 }

+ 26 - 13
file.go

@@ -9,13 +9,13 @@ import "fmt"
 //
 // Len() returns the number of lines in File.
 //
-// Update() mutates File by introducing tree structural changes and updaing the
+// Update() mutates File by introducing tree structural changes and updating the
 // length mapping.
 //
 // Dump() writes the tree to a string and Validate() checks the tree integrity.
 type File struct {
 	tree   *RBTree
-	status map[int]int64
+	statuses []map[int]int64
 }
 
 // TreeEnd denotes the value of the last leaf in the tree.
@@ -64,6 +64,12 @@ func abs64(v int64) int64 {
 	return v
 }
 
+func (file *File) updateTime(time int, delta int) {
+	for _, status := range file.statuses {
+		status[time] += int64(delta)
+	}
+}
+
 // NewFile initializes a new instance of File struct.
 //
 // time is the starting value of the first node;
@@ -71,30 +77,30 @@ func abs64(v int64) int64 {
 // length is the starting length of the tree (the key of the second and the
 // last node);
 //
-// status is the attached interval length mapping.
-func NewFile(time int, length int, status map[int]int64) *File {
+// statuses are the attached interval length mappings.
+func NewFile(time int, length int, statuses ...map[int]int64) *File {
 	file := new(File)
-	file.status = status
+	file.statuses = statuses
 	file.tree = new(RBTree)
 	if length > 0 {
-		status[time] += int64(length)
+		file.updateTime(time, length)
 		file.tree.Insert(Item{key: 0, value: time})
 	}
 	file.tree.Insert(Item{key: length, value: TreeEnd})
 	return file
 }
 
-// NewFileFromTree is an alternative contructor for File which is used in tests.
+// NewFileFromTree is an alternative constructor for File which is used in tests.
 // The resulting tree is validated with Validate() to ensure the initial integrity.
 //
 // keys is a slice with the starting tree keys.
 //
 // vals is a slice with the starting tree values. Must match the size of keys.
 //
-// status is the attached interval length mapping.
-func NewFileFromTree(keys []int, vals []int, status map[int]int64) *File {
+// statuses are the attached interval length mappings.
+func NewFileFromTree(keys []int, vals []int, statuses ...map[int]int64) *File {
 	file := new(File)
-	file.status = status
+	file.statuses = statuses
 	file.tree = new(RBTree)
 	if len(keys) != len(vals) {
 		panic("keys and vals must be of equal length")
@@ -148,10 +154,9 @@ func (file *File) Update(time int, pos int, ins_length int, del_length int) {
 	if tree.Len() < 2 && tree.Min().Item().key != 0 {
 		panic("invalid tree state")
 	}
-	status := file.status
 	iter := tree.FindLE(pos)
 	origin := *iter.Item()
-	status[time] += int64(ins_length)
+	file.updateTime(time, ins_length)
 	if del_length == 0 {
 		// simple case with insertions only
 		if origin.key < pos || (origin.value == time && pos == 0) {
@@ -183,7 +188,7 @@ func (file *File) Update(time int, pos int, ins_length int, del_length int) {
 		if delta <= 0 {
 			break
 		}
-		status[node.value] -= int64(delta)
+		file.updateTime(node.value, -delta)
 		if node.key >= pos {
 			origin = *node
 			tree.DeleteWithIterator(iter)
@@ -239,6 +244,14 @@ func (file *File) Update(time int, pos int, ins_length int, del_length int) {
 	}
 }
 
+func (file *File) Status(index int) map[int]int64 {
+	if index < 0 || index >= len(file.statuses) {
+		panic(fmt.Sprintf("status index %d is out of bounds [0, %d)",
+		                  index, len(file.statuses)))
+	}
+	return file.statuses[index]
+}
+
 // Dump formats the underlying line interval tree into a string.
 // Useful for error messages, panic()-s and debugging.
 func (file *File) Dump() string {