9 年之前 · 8a60feab56
--- a/analyser.go
+++ b/analyser.go
@@ -19,12 +19,29 @@ import (
 
				 	"gopkg.in/src-d/go-git.v4/utils/merkletrie"
			
 
				 )
			
 
				 
			
 
				+// Analyser allows to gather the line burndown statistics for a Git repository.
			
 
				 type Analyser struct {
			
 
				+	// Repository points to the analysed Git repository struct from go-git.
			
 
				 	Repository          *git.Repository
			
 
				+	// Granularity sets the size of each band - the number of days it spans.
			
 
				+	// Smaller values provide better resolution but require more work and eat more
			
 
				+	// memory. 30 days is usually enough.
			
 
				 	Granularity         int
			
 
				+	// Sampling sets how detailed is the statistic - the size of the interval in
			
 
				+	// days between consecutive measurements. It is usually a good idea to set it
			
 
				+	// <= Granularity. Try 15 or 30.
			
 
				 	Sampling            int
			
 
				+	// SimilarityThreshold adjusts the heuristic to determine file renames.
			
 
				+	// It has the same units as cgit's -X rename-threshold or -M. Better to
			
 
				+	// set it to the default value of 90 (90%).
			
 
				 	SimilarityThreshold int
			
 
				+	// Debug activates the debugging mode. Analyse() runs slower in this mode
			
 
				+	// but it accurately checks all the intermediate states for invariant
			
 
				+	// violations.
			
 
				 	Debug               bool
			
 
				+	// OnProgress is the callback which is invoked in Analyse() to output it's
			
 
				+	// progress. The first argument is the number of processed commits and the
			
 
				+	// second is the total number of commits.
			
 
				 	OnProgress          func(int, int)
			
 
				 }
			
 
				 
			
@@ -70,53 +87,53 @@ func str(file *object.Blob) string {
 
				 	return buf.String()
			
 
				 }
			
 
				 
			
 
				-type DummyIO struct {
			
 
				+type dummyIO struct {
			
 
				 }
			
 
				 
			
 
				-func (DummyIO) Read(p []byte) (int, error) {
			
 
				+func (dummyIO) Read(p []byte) (int, error) {
			
 
				 	return 0, io.EOF
			
 
				 }
			
 
				 
			
 
				-func (DummyIO) Write(p []byte) (int, error) {
			
 
				+func (dummyIO) Write(p []byte) (int, error) {
			
 
				 	return len(p), nil
			
 
				 }
			
 
				 
			
 
				-func (DummyIO) Close() error {
			
 
				+func (dummyIO) Close() error {
			
 
				 	return nil
			
 
				 }
			
 
				 
			
 
				-type DummyEncodedObject struct {
			
 
				+type dummyEncodedObject struct {
			
 
				 	FakeHash plumbing.Hash
			
 
				 }
			
 
				 
			
 
				-func (obj DummyEncodedObject) Hash() plumbing.Hash {
			
 
				+func (obj dummyEncodedObject) Hash() plumbing.Hash {
			
 
				 	return obj.FakeHash
			
 
				 }
			
 
				 
			
 
				-func (obj DummyEncodedObject) Type() plumbing.ObjectType {
			
 
				+func (obj dummyEncodedObject) Type() plumbing.ObjectType {
			
 
				 	return plumbing.BlobObject
			
 
				 }
			
 
				 
			
 
				-func (obj DummyEncodedObject) SetType(plumbing.ObjectType) {
			
 
				+func (obj dummyEncodedObject) SetType(plumbing.ObjectType) {
			
 
				 }
			
 
				 
			
 
				-func (obj DummyEncodedObject) Size() int64 {
			
 
				+func (obj dummyEncodedObject) Size() int64 {
			
 
				 	return 0
			
 
				 }
			
 
				 
			
 
				-func (obj DummyEncodedObject) SetSize(int64) {
			
 
				+func (obj dummyEncodedObject) SetSize(int64) {
			
 
				 }
			
 
				 
			
 
				-func (obj DummyEncodedObject) Reader() (io.ReadCloser, error) {
			
 
				-	return DummyIO{}, nil
			
 
				+func (obj dummyEncodedObject) Reader() (io.ReadCloser, error) {
			
 
				+	return dummyIO{}, nil
			
 
				 }
			
 
				 
			
 
				-func (obj DummyEncodedObject) Writer() (io.WriteCloser, error) {
			
 
				-	return DummyIO{}, nil
			
 
				+func (obj dummyEncodedObject) Writer() (io.WriteCloser, error) {
			
 
				+	return dummyIO{}, nil
			
 
				 }
			
 
				 
			
 
				 func createDummyBlob(hash *plumbing.Hash) (*object.Blob, error) {
			
 
				-	return object.DecodeBlob(DummyEncodedObject{*hash})
			
 
				+	return object.DecodeBlob(dummyEncodedObject{*hash})
			
 
				 }
			
 
				 
			
 
				 func (analyser *Analyser) handleInsertion(
			
@@ -265,6 +282,9 @@ func (analyser *Analyser) handleRename(from, to string, files map[string]*File)
 
				 	delete(files, from)
			
 
				 }
			
 
				 
			
 
				+// Commits returns the critical path in the repository's history. It starts
			
 
				+// from HEAD and traces commits backwards till the root. When it encounters
			
 
				+// a merge (more than one parent), it always chooses the first parent.
			
 
				 func (analyser *Analyser) Commits() []*object.Commit {
			
 
				 	result := []*object.Commit{}
			
 
				 	repository := analyser.Repository
			
@@ -569,6 +589,16 @@ func (analyser *Analyser) detectRenames(
 
				 	return reduced_changes
			
 
				 }
			
 
				 
			
 
				+// Analyse calculates the line burndown statistics for the bound repository.
			
 
				+//
			
 
				+// commits is a slice with the sequential commit history. It shall start from
			
 
				+// the root (ascending order).
			
 
				+//
			
 
				+// Returns the list of snapshots of the cumulative line edit times.
			
 
				+// The number of snapshots (the first dimension >[]<[]int64) depends on
			
 
				+// Analyser.Sampling (the more Sampling, the less the value); the length of
			
 
				+// each snapshot depends on Analyser.Granularity (the more Granularity,
			
 
				+// the less the value).
			
 
				 func (analyser *Analyser) Analyse(commits []*object.Commit) [][]int64 {
			
 
				 	sampling := analyser.Sampling
			
 
				 	if sampling == 0 {
			
--- a/cmd/hercules/main.go
+++ b/cmd/hercules/main.go
@@ -1,3 +1,9 @@
 
				+/*
			
 
				+Package main provides the command line tool to gather the line burndown
			
 
				+statistics from Git repositories. Usage:
			
 
				+
			
 
				+	hercules <URL or FS path>
			
 
				+*/
			
 
				 package main
			
 
				 
			
 
				 import (
			
--- a/doc.go
+++ b/doc.go
@@ -0,0 +1,39 @@
 
				+/*
			
 
				+Package hercules contains the functions which are needed to gather the line
			
 
				+burndown statistics from a Git repository.
			
 
				+
			
 
				+Analyser is the main object which concentrates the high level logic. It
			
 
				+provides Commits() and Analyse() methods to get the work done. The following
			
 
				+example was taken from cmd/hercules:
			
 
				+
			
 
				+	var repository *git.Repository
			
 
				+	// ... initialize repository ...
			
 
				+	analyser := hercules.Analyser{
			
 
				+		Repository: repository,
			
 
				+		OnProgress: func(commit, length int) {
			
 
				+			fmt.Fprintf(os.Stderr, "%d / %d\r", commit, length)
			
 
				+		},
			
 
				+		Granularity:         30,
			
 
				+		Sampling:            15,
			
 
				+		SimilarityThreshold: 90,
			
 
				+		Debug:               false,
			
 
				+	}
			
 
				+	commits := analyser.Commits()  // or specify a custom list
			
 
				+	statuses := analyser.Analyse(commits)
			
 
				+	// [y][x]int64 where y is the snapshot index and x is the granulated time index.
			
 
				+
			
 
				+As commented in the code, the list of commits can be any valid slice of *object.Commit.
			
 
				+The returned statuses slice of slices is a rectangular 2D matrix where
			
 
				+the number of rows equals to the repository's lifetime divided by the sampling
			
 
				+value (detail factor) and the number of columns is the repository's lifetime
			
 
				+divided by the granularity value (number of bands).
			
 
				+
			
 
				+Analyser depends heavily on https://github.com/src-d/go-git and leverages the
			
 
				+diff algorithm through https://github.com/sergi/go-diff.
			
 
				+
			
 
				+Besides, hercules defines File and RBTree. These are low level data structures
			
 
				+required by Analyser. File carries an instance of RBTree and the current line
			
 
				+burndown state. RBTree implements the red-black balanced binary tree and is
			
 
				+based on https://github.com/yasushi-saito/rbtree.
			
 
				+*/
			
 
				+package hercules
			
--- a/file.go
+++ b/file.go
@@ -2,16 +2,29 @@ package hercules
 
				 
			
 
				 import "fmt"
			
 
				 
			
 
				+// A file encapsulates a balanced binary tree to store line intervals and
			
 
				+// a cumulative mapping of values to the corresponding length counters. Users
			
 
				+// are not supposed to create File-s directly; instead, they should call NewFile().
			
 
				+// NewFileFromTree() is the special constructor which is useful in the tests.
			
 
				+//
			
 
				+// Len() returns the number of lines in File.
			
 
				+//
			
 
				+// Update() mutates File by introducing tree structural changes and updaing the
			
 
				+// length mapping.
			
 
				+//
			
 
				+// Dump() writes the tree to a string and Validate() checks the tree integrity.
			
 
				 type File struct {
			
 
				 	tree   *RBTree
			
 
				 	status map[int]int64
			
 
				 }
			
 
				 
			
 
				+// TreeEnd denotes the value of the last leaf in the tree.
			
 
				 const TreeEnd int = -1
			
 
				 
			
 
				-// An ugly side of Go.
			
 
				+// The ugly side of Go.
			
 
				 // template <typename T> please!
			
 
				 
			
 
				+// min calculates the minimum of two 32-bit integers.
			
 
				 func min(a int, b int) int {
			
 
				 	if a < b {
			
 
				 		return a
			
@@ -19,6 +32,7 @@ func min(a int, b int) int {
 
				 	return b
			
 
				 }
			
 
				 
			
 
				+// min64 calculates the minimum of two 64-bit integers.
			
 
				 func min64(a int64, b int64) int64 {
			
 
				 	if a < b {
			
 
				 		return a
			
@@ -26,6 +40,7 @@ func min64(a int64, b int64) int64 {
 
				 	return b
			
 
				 }
			
 
				 
			
 
				+// max calculates the maximum of two 32-bit integers.
			
 
				 func max(a int, b int) int {
			
 
				 	if a < b {
			
 
				 		return b
			
@@ -33,6 +48,7 @@ func max(a int, b int) int {
 
				 	return a
			
 
				 }
			
 
				 
			
 
				+// max64 calculates the maximum of two 64-bit integers.
			
 
				 func max64(a int64, b int64) int64 {
			
 
				 	if a < b {
			
 
				 		return b
			
@@ -40,6 +56,7 @@ func max64(a int64, b int64) int64 {
 
				 	return a
			
 
				 }
			
 
				 
			
 
				+// abs64 calculates the absolute value of a 64-bit integer.
			
 
				 func abs64(v int64) int64 {
			
 
				 	if v <= 0 {
			
 
				 		return -v
			
@@ -47,6 +64,14 @@ func abs64(v int64) int64 {
 
				 	return v
			
 
				 }
			
 
				 
			
 
				+// NewFile initializes a new instance of File struct.
			
 
				+//
			
 
				+// time is the starting value of the first node;
			
 
				+//
			
 
				+// length is the starting length of the tree (the key of the second and the
			
 
				+// last node);
			
 
				+//
			
 
				+// status is the attached interval length mapping.
			
 
				 func NewFile(time int, length int, status map[int]int64) *File {
			
 
				 	file := new(File)
			
 
				 	file.status = status
			
@@ -59,6 +84,14 @@ func NewFile(time int, length int, status map[int]int64) *File {
 
				 	return file
			
 
				 }
			
 
				 
			
 
				+// NewFileFromTree is an alternative contructor for File which is used in tests.
			
 
				+// The resulting tree is validated with Validate() to ensure the initial integrity.
			
 
				+//
			
 
				+// keys is a slice with the starting tree keys.
			
 
				+//
			
 
				+// vals is a slice with the starting tree values. Must match the size of keys.
			
 
				+//
			
 
				+// status is the attached interval length mapping.
			
 
				 func NewFileFromTree(keys []int, vals []int, status map[int]int64) *File {
			
 
				 	file := new(File)
			
 
				 	file.status = status
			
@@ -73,10 +106,27 @@ func NewFileFromTree(keys []int, vals []int, status map[int]int64) *File {
 
				 	return file
			
 
				 }
			
 
				 
			
 
				+// Len returns the File's size - that is, the maximum key in the tree of line
			
 
				+// intervals.
			
 
				 func (file *File) Len() int {
			
 
				 	return file.tree.Max().Item().key
			
 
				 }
			
 
				 
			
 
				+// Update modifies the underlying tree to adapt to the specified line changes.
			
 
				+//
			
 
				+// time is the time when the requested changes are made. Sets the values of the
			
 
				+// inserted nodes.
			
 
				+//
			
 
				+// pos is the index of the line at which the changes are introduced.
			
 
				+//
			
 
				+// ins_length is the number of inserted lines after pos.
			
 
				+//
			
 
				+// del_length is the number of removed lines after pos. Deletions come before
			
 
				+// the insertions.
			
 
				+//
			
 
				+// The code inside this function is probably the most important one throughout
			
 
				+// the project. It is extensively covered with tests. If you find a bug, please
			
 
				+// add the corresponding case in file_test.go.
			
 
				 func (file *File) Update(time int, pos int, ins_length int, del_length int) {
			
 
				 	if time < 0 {
			
 
				 		panic("time may not be negative")
			
@@ -189,6 +239,8 @@ func (file *File) Update(time int, pos int, ins_length int, del_length int) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+// Dump formats the underlying line interval tree into a string.
			
 
				+// Useful for error messages, panic()-s and debugging.
			
 
				 func (file *File) Dump() string {
			
 
				 	buffer := ""
			
 
				 	for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
			
@@ -198,11 +250,20 @@ func (file *File) Dump() string {
 
				 	return buffer
			
 
				 }
			
 
				 
			
 
				+// Validate checks the underlying line interval tree integrity.
			
 
				+// The checks are as follows:
			
 
				+//
			
 
				+// 1. The minimum key must be 0 because the first line index is always 0.
			
 
				+//
			
 
				+// 2. The last node must carry TreeEnd value. This is the maintained invariant
			
 
				+// which marks the ending of the last line interval.
			
 
				+//
			
 
				+// 3. Node keys must monotonically increase and never duplicate.
			
 
				 func (file *File) Validate() {
			
 
				 	if file.tree.Min().Item().key != 0 {
			
 
				 		panic("the tree must start with key 0")
			
 
				 	}
			
 
				-	if file.tree.Max().Item().value != -1 {
			
 
				+	if file.tree.Max().Item().value != TreeEnd {
			
 
				 		panic(fmt.Sprintf("the last value in the tree must be %d", TreeEnd))
			
 
				 	}
			
 
				 	prev_key := -1
			
--- a/rbtree.go
+++ b/rbtree.go
@@ -1,12 +1,3 @@
 
				-//
			
 
				-// Created by Yaz Saito on 06/10/12.
			
 
				-//
			
 
				-
			
 
				-// A red-black tree with an API similar to C++ STL's.
			
 
				-//
			
 
				-// The implementation is inspired (read: stolen) from:
			
 
				-// http://en.literateprograms.org/Red-black_tree_(C)#chunk use:private function prototypes.
			
 
				-//
			
 
				 package hercules
			
 
				 
			
 
				 //
			
@@ -19,6 +10,14 @@ type Item struct {
 
				 	value int
			
 
				 }
			
 
				 
			
 
				+// RBTree created by Yaz Saito on 06/10/12.
			
 
				+//
			
 
				+// A red-black tree with an API similar to C++ STL's.
			
 
				+//
			
 
				+// The implementation is inspired (read: stolen) from:
			
 
				+// http://en.literateprograms.org/Red-black_tree_(C)#chunk use:private function prototypes.
			
 
				+//
			
 
				+// The code was optimized for the simple integer types of key and value.
			
 
				 type RBTree struct {
			
 
				 	// Root of the tree
			
 
				 	root *node