瀏覽代碼

Add documentation

Vadim Markovtsev 7 年之前
父節點
當前提交
8a60feab56
共有 5 個文件被更改,包括 161 次插入26 次删除
  1. 45 15
      analyser.go
  2. 6 0
      cmd/hercules/main.go
  3. 39 0
      doc.go
  4. 63 2
      file.go
  5. 8 9
      rbtree.go

+ 45 - 15
analyser.go

@@ -19,12 +19,29 @@ import (
 	"gopkg.in/src-d/go-git.v4/utils/merkletrie"
 )
 
+// Analyser allows to gather the line burndown statistics for a Git repository.
 type Analyser struct {
+	// Repository points to the analysed Git repository struct from go-git.
 	Repository          *git.Repository
+	// Granularity sets the size of each band - the number of days it spans.
+	// Smaller values provide better resolution but require more work and eat more
+	// memory. 30 days is usually enough.
 	Granularity         int
+	// Sampling sets how detailed is the statistic - the size of the interval in
+	// days between consecutive measurements. It is usually a good idea to set it
+	// <= Granularity. Try 15 or 30.
 	Sampling            int
+	// SimilarityThreshold adjusts the heuristic to determine file renames.
+	// It has the same units as cgit's -X rename-threshold or -M. Better to
+	// set it to the default value of 90 (90%).
 	SimilarityThreshold int
+	// Debug activates the debugging mode. Analyse() runs slower in this mode
+	// but it accurately checks all the intermediate states for invariant
+	// violations.
 	Debug               bool
+	// OnProgress is the callback which is invoked in Analyse() to output it's
+	// progress. The first argument is the number of processed commits and the
+	// second is the total number of commits.
 	OnProgress          func(int, int)
 }
 
@@ -70,53 +87,53 @@ func str(file *object.Blob) string {
 	return buf.String()
 }
 
-type DummyIO struct {
+type dummyIO struct {
 }
 
-func (DummyIO) Read(p []byte) (int, error) {
+func (dummyIO) Read(p []byte) (int, error) {
 	return 0, io.EOF
 }
 
-func (DummyIO) Write(p []byte) (int, error) {
+func (dummyIO) Write(p []byte) (int, error) {
 	return len(p), nil
 }
 
-func (DummyIO) Close() error {
+func (dummyIO) Close() error {
 	return nil
 }
 
-type DummyEncodedObject struct {
+type dummyEncodedObject struct {
 	FakeHash plumbing.Hash
 }
 
-func (obj DummyEncodedObject) Hash() plumbing.Hash {
+func (obj dummyEncodedObject) Hash() plumbing.Hash {
 	return obj.FakeHash
 }
 
-func (obj DummyEncodedObject) Type() plumbing.ObjectType {
+func (obj dummyEncodedObject) Type() plumbing.ObjectType {
 	return plumbing.BlobObject
 }
 
-func (obj DummyEncodedObject) SetType(plumbing.ObjectType) {
+func (obj dummyEncodedObject) SetType(plumbing.ObjectType) {
 }
 
-func (obj DummyEncodedObject) Size() int64 {
+func (obj dummyEncodedObject) Size() int64 {
 	return 0
 }
 
-func (obj DummyEncodedObject) SetSize(int64) {
+func (obj dummyEncodedObject) SetSize(int64) {
 }
 
-func (obj DummyEncodedObject) Reader() (io.ReadCloser, error) {
-	return DummyIO{}, nil
+func (obj dummyEncodedObject) Reader() (io.ReadCloser, error) {
+	return dummyIO{}, nil
 }
 
-func (obj DummyEncodedObject) Writer() (io.WriteCloser, error) {
-	return DummyIO{}, nil
+func (obj dummyEncodedObject) Writer() (io.WriteCloser, error) {
+	return dummyIO{}, nil
 }
 
 func createDummyBlob(hash *plumbing.Hash) (*object.Blob, error) {
-	return object.DecodeBlob(DummyEncodedObject{*hash})
+	return object.DecodeBlob(dummyEncodedObject{*hash})
 }
 
 func (analyser *Analyser) handleInsertion(
@@ -265,6 +282,9 @@ func (analyser *Analyser) handleRename(from, to string, files map[string]*File)
 	delete(files, from)
 }
 
+// Commits returns the critical path in the repository's history. It starts
+// from HEAD and traces commits backwards till the root. When it encounters
+// a merge (more than one parent), it always chooses the first parent.
 func (analyser *Analyser) Commits() []*object.Commit {
 	result := []*object.Commit{}
 	repository := analyser.Repository
@@ -569,6 +589,16 @@ func (analyser *Analyser) detectRenames(
 	return reduced_changes
 }
 
+// Analyse calculates the line burndown statistics for the bound repository.
+//
+// commits is a slice with the sequential commit history. It shall start from
+// the root (ascending order).
+//
+// Returns the list of snapshots of the cumulative line edit times.
+// The number of snapshots (the first dimension >[]<[]int64) depends on
+// Analyser.Sampling (the more Sampling, the less the value); the length of
+// each snapshot depends on Analyser.Granularity (the more Granularity,
+// the less the value).
 func (analyser *Analyser) Analyse(commits []*object.Commit) [][]int64 {
 	sampling := analyser.Sampling
 	if sampling == 0 {

+ 6 - 0
cmd/hercules/main.go

@@ -1,3 +1,9 @@
+/*
+Package main provides the command line tool to gather the line burndown
+statistics from Git repositories. Usage:
+
+	hercules <URL or FS path>
+*/
 package main
 
 import (

+ 39 - 0
doc.go

@@ -0,0 +1,39 @@
+/*
+Package hercules contains the functions which are needed to gather the line
+burndown statistics from a Git repository.
+
+Analyser is the main object which concentrates the high level logic. It
+provides Commits() and Analyse() methods to get the work done. The following
+example was taken from cmd/hercules:
+
+	var repository *git.Repository
+	// ... initialize repository ...
+	analyser := hercules.Analyser{
+		Repository: repository,
+		OnProgress: func(commit, length int) {
+			fmt.Fprintf(os.Stderr, "%d / %d\r", commit, length)
+		},
+		Granularity:         30,
+		Sampling:            15,
+		SimilarityThreshold: 90,
+		Debug:               false,
+	}
+	commits := analyser.Commits()  // or specify a custom list
+	statuses := analyser.Analyse(commits)
+	// [y][x]int64 where y is the snapshot index and x is the granulated time index.
+
+As commented in the code, the list of commits can be any valid slice of *object.Commit.
+The returned statuses slice of slices is a rectangular 2D matrix where
+the number of rows equals to the repository's lifetime divided by the sampling
+value (detail factor) and the number of columns is the repository's lifetime
+divided by the granularity value (number of bands).
+
+Analyser depends heavily on https://github.com/src-d/go-git and leverages the
+diff algorithm through https://github.com/sergi/go-diff.
+
+Besides, hercules defines File and RBTree. These are low level data structures
+required by Analyser. File carries an instance of RBTree and the current line
+burndown state. RBTree implements the red-black balanced binary tree and is
+based on https://github.com/yasushi-saito/rbtree.
+*/
+package hercules

+ 63 - 2
file.go

@@ -2,16 +2,29 @@ package hercules
 
 import "fmt"
 
+// A file encapsulates a balanced binary tree to store line intervals and
+// a cumulative mapping of values to the corresponding length counters. Users
+// are not supposed to create File-s directly; instead, they should call NewFile().
+// NewFileFromTree() is the special constructor which is useful in the tests.
+//
+// Len() returns the number of lines in File.
+//
+// Update() mutates File by introducing tree structural changes and updaing the
+// length mapping.
+//
+// Dump() writes the tree to a string and Validate() checks the tree integrity.
 type File struct {
 	tree   *RBTree
 	status map[int]int64
 }
 
+// TreeEnd denotes the value of the last leaf in the tree.
 const TreeEnd int = -1
 
-// An ugly side of Go.
+// The ugly side of Go.
 // template <typename T> please!
 
+// min calculates the minimum of two 32-bit integers.
 func min(a int, b int) int {
 	if a < b {
 		return a
@@ -19,6 +32,7 @@ func min(a int, b int) int {
 	return b
 }
 
+// min64 calculates the minimum of two 64-bit integers.
 func min64(a int64, b int64) int64 {
 	if a < b {
 		return a
@@ -26,6 +40,7 @@ func min64(a int64, b int64) int64 {
 	return b
 }
 
+// max calculates the maximum of two 32-bit integers.
 func max(a int, b int) int {
 	if a < b {
 		return b
@@ -33,6 +48,7 @@ func max(a int, b int) int {
 	return a
 }
 
+// max64 calculates the maximum of two 64-bit integers.
 func max64(a int64, b int64) int64 {
 	if a < b {
 		return b
@@ -40,6 +56,7 @@ func max64(a int64, b int64) int64 {
 	return a
 }
 
+// abs64 calculates the absolute value of a 64-bit integer.
 func abs64(v int64) int64 {
 	if v <= 0 {
 		return -v
@@ -47,6 +64,14 @@ func abs64(v int64) int64 {
 	return v
 }
 
+// NewFile initializes a new instance of File struct.
+//
+// time is the starting value of the first node;
+//
+// length is the starting length of the tree (the key of the second and the
+// last node);
+//
+// status is the attached interval length mapping.
 func NewFile(time int, length int, status map[int]int64) *File {
 	file := new(File)
 	file.status = status
@@ -59,6 +84,14 @@ func NewFile(time int, length int, status map[int]int64) *File {
 	return file
 }
 
+// NewFileFromTree is an alternative contructor for File which is used in tests.
+// The resulting tree is validated with Validate() to ensure the initial integrity.
+//
+// keys is a slice with the starting tree keys.
+//
+// vals is a slice with the starting tree values. Must match the size of keys.
+//
+// status is the attached interval length mapping.
 func NewFileFromTree(keys []int, vals []int, status map[int]int64) *File {
 	file := new(File)
 	file.status = status
@@ -73,10 +106,27 @@ func NewFileFromTree(keys []int, vals []int, status map[int]int64) *File {
 	return file
 }
 
+// Len returns the File's size - that is, the maximum key in the tree of line
+// intervals.
 func (file *File) Len() int {
 	return file.tree.Max().Item().key
 }
 
+// Update modifies the underlying tree to adapt to the specified line changes.
+//
+// time is the time when the requested changes are made. Sets the values of the
+// inserted nodes.
+//
+// pos is the index of the line at which the changes are introduced.
+//
+// ins_length is the number of inserted lines after pos.
+//
+// del_length is the number of removed lines after pos. Deletions come before
+// the insertions.
+//
+// The code inside this function is probably the most important one throughout
+// the project. It is extensively covered with tests. If you find a bug, please
+// add the corresponding case in file_test.go.
 func (file *File) Update(time int, pos int, ins_length int, del_length int) {
 	if time < 0 {
 		panic("time may not be negative")
@@ -189,6 +239,8 @@ func (file *File) Update(time int, pos int, ins_length int, del_length int) {
 	}
 }
 
+// Dump formats the underlying line interval tree into a string.
+// Useful for error messages, panic()-s and debugging.
 func (file *File) Dump() string {
 	buffer := ""
 	for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
@@ -198,11 +250,20 @@ func (file *File) Dump() string {
 	return buffer
 }
 
+// Validate checks the underlying line interval tree integrity.
+// The checks are as follows:
+//
+// 1. The minimum key must be 0 because the first line index is always 0.
+//
+// 2. The last node must carry TreeEnd value. This is the maintained invariant
+// which marks the ending of the last line interval.
+//
+// 3. Node keys must monotonically increase and never duplicate.
 func (file *File) Validate() {
 	if file.tree.Min().Item().key != 0 {
 		panic("the tree must start with key 0")
 	}
-	if file.tree.Max().Item().value != -1 {
+	if file.tree.Max().Item().value != TreeEnd {
 		panic(fmt.Sprintf("the last value in the tree must be %d", TreeEnd))
 	}
 	prev_key := -1

+ 8 - 9
rbtree.go

@@ -1,12 +1,3 @@
-//
-// Created by Yaz Saito on 06/10/12.
-//
-
-// A red-black tree with an API similar to C++ STL's.
-//
-// The implementation is inspired (read: stolen) from:
-// http://en.literateprograms.org/Red-black_tree_(C)#chunk use:private function prototypes.
-//
 package hercules
 
 //
@@ -19,6 +10,14 @@ type Item struct {
 	value int
 }
 
+// RBTree created by Yaz Saito on 06/10/12.
+//
+// A red-black tree with an API similar to C++ STL's.
+//
+// The implementation is inspired (read: stolen) from:
+// http://en.literateprograms.org/Red-black_tree_(C)#chunk use:private function prototypes.
+//
+// The code was optimized for the simple integer types of key and value.
 type RBTree struct {
 	// Root of the tree
 	root *node