9 年前 · 53d4a048b0
--- a/analyser.go
+++ b/analyser.go
@@ -7,20 +7,23 @@ import (
 
				 	"fmt"
			
 
				 	"io"
			
 
				 	"os"
			
 
				+	"sort"
			
 
				 	"time"
			
 
				 	"unicode/utf8"
			
 
				 
			
 
				 	"github.com/sergi/go-diff/diffmatchpatch"
			
 
				 	"gopkg.in/src-d/go-git.v4"
			
 
				+	"gopkg.in/src-d/go-git.v4/plumbing"
			
 
				 	"gopkg.in/src-d/go-git.v4/plumbing/object"
			
 
				 	"gopkg.in/src-d/go-git.v4/utils/merkletrie"
			
 
				 )
			
 
				 
			
 
				 type Analyser struct {
			
 
				-	Repository  *git.Repository
			
 
				-	Granularity int
			
 
				-	Sampling    int
			
 
				-	OnProgress  func(int, int)
			
 
				+	Repository          *git.Repository
			
 
				+	Granularity         int
			
 
				+	Sampling            int
			
 
				+	SimilarityThreshold int
			
 
				+	OnProgress          func(int, int)
			
 
				 }
			
 
				 
			
 
				 func checkClose(c io.Closer) {
			
@@ -58,11 +61,9 @@ func str(file *object.Blob) string {
 
				 }
			
 
				 
			
 
				 func (analyser *Analyser) handleInsertion(
			
 
				-	change *object.Change, day int, status map[int]int64, files map[string]*File) {
			
 
				-	blob, err := analyser.Repository.BlobObject(change.To.TreeEntry.Hash)
			
 
				-	if err != nil {
			
 
				-		panic(err)
			
 
				-	}
			
 
				+	change *object.Change, day int, status map[int]int64, files map[string]*File,
			
 
				+	cache *map[plumbing.Hash]*object.Blob) {
			
 
				+	blob := (*cache)[change.To.TreeEntry.Hash]
			
 
				 	lines, err := loc(blob)
			
 
				 	if err != nil {
			
 
				 		return
			
@@ -77,11 +78,9 @@ func (analyser *Analyser) handleInsertion(
 
				 }
			
 
				 
			
 
				 func (analyser *Analyser) handleDeletion(
			
 
				-	change *object.Change, day int, status map[int]int64, files map[string]*File) {
			
 
				-	blob, err := analyser.Repository.BlobObject(change.From.TreeEntry.Hash)
			
 
				-	if err != nil {
			
 
				-		panic(err)
			
 
				-	}
			
 
				+	change *object.Change, day int, status map[int]int64, files map[string]*File,
			
 
				+	cache *map[plumbing.Hash]*object.Blob) {
			
 
				+	blob := (*cache)[change.From.TreeEntry.Hash]
			
 
				 	lines, err := loc(blob)
			
 
				 	if err != nil {
			
 
				 		return
			
@@ -93,22 +92,17 @@ func (analyser *Analyser) handleDeletion(
 
				 }
			
 
				 
			
 
				 func (analyser *Analyser) handleModification(
			
 
				-	change *object.Change, day int, status map[int]int64, files map[string]*File) {
			
 
				-	blob_from, err := analyser.Repository.BlobObject(change.From.TreeEntry.Hash)
			
 
				-	if err != nil {
			
 
				-		panic(err)
			
 
				-	}
			
 
				-	blob_to, err := analyser.Repository.BlobObject(change.To.TreeEntry.Hash)
			
 
				-	if err != nil {
			
 
				-		panic(err)
			
 
				-	}
			
 
				+	change *object.Change, day int, status map[int]int64, files map[string]*File,
			
 
				+	cache *map[plumbing.Hash]*object.Blob) {
			
 
				+	blob_from := (*cache)[change.From.TreeEntry.Hash]
			
 
				+	blob_to := (*cache)[change.To.TreeEntry.Hash]
			
 
				 	// we are not validating UTF-8 here because for example
			
 
				 	// git/git 4f7770c87ce3c302e1639a7737a6d2531fe4b160 fetch-pack.c is invalid UTF-8
			
 
				 	str_from := str(blob_from)
			
 
				 	str_to := str(blob_to)
			
 
				 	file, exists := files[change.From.Name]
			
 
				 	if !exists {
			
 
				-		analyser.handleInsertion(change, day, status, files)
			
 
				+		analyser.handleInsertion(change, day, status, files, cache)
			
 
				 		return
			
 
				 	}
			
 
				 	// possible rename
			
@@ -249,6 +243,216 @@ func (analyser *Analyser) groupStatus(status map[int]int64, day int) []int64 {
 
				 	return result
			
 
				 }
			
 
				 
			
 
				+type sortableChange struct {
			
 
				+	change *object.Change
			
 
				+	hash   plumbing.Hash
			
 
				+}
			
 
				+
			
 
				+type sortableChanges []sortableChange
			
 
				+
			
 
				+func (change *sortableChange) Less(other *sortableChange) bool {
			
 
				+	for x := 0; x < 20; x++ {
			
 
				+		if change.hash[x] < other.hash[x] {
			
 
				+			return true
			
 
				+		}
			
 
				+	}
			
 
				+	return false
			
 
				+}
			
 
				+
			
 
				+func (slice sortableChanges) Len() int {
			
 
				+	return len(slice)
			
 
				+}
			
 
				+
			
 
				+func (slice sortableChanges) Less(i, j int) bool {
			
 
				+	return slice[i].Less(&slice[j])
			
 
				+}
			
 
				+
			
 
				+func (slice sortableChanges) Swap(i, j int) {
			
 
				+	slice[i], slice[j] = slice[j], slice[i]
			
 
				+}
			
 
				+
			
 
				+type sortableBlob struct {
			
 
				+	change *object.Change
			
 
				+	size   int64
			
 
				+}
			
 
				+
			
 
				+type sortableBlobs []sortableBlob
			
 
				+
			
 
				+func (change *sortableBlob) Less(other *sortableBlob) bool {
			
 
				+	return change.size < other.size
			
 
				+}
			
 
				+
			
 
				+func (slice sortableBlobs) Len() int {
			
 
				+	return len(slice)
			
 
				+}
			
 
				+
			
 
				+func (slice sortableBlobs) Less(i, j int) bool {
			
 
				+	return slice[i].Less(&slice[j])
			
 
				+}
			
 
				+
			
 
				+func (slice sortableBlobs) Swap(i, j int) {
			
 
				+	slice[i], slice[j] = slice[j], slice[i]
			
 
				+}
			
 
				+
			
 
				+func (analyser *Analyser) sizesAreClose(size1 int64, size2 int64) bool {
			
 
				+	return abs64(size1-size2)*100/min64(size1, size2) <=
			
 
				+		int64(100-analyser.SimilarityThreshold)
			
 
				+}
			
 
				+
			
 
				+func (analyser *Analyser) blobsAreClose(
			
 
				+	blob1 *object.Blob, blob2 *object.Blob) bool {
			
 
				+	str_from := str(blob1)
			
 
				+	str_to := str(blob2)
			
 
				+	dmp := diffmatchpatch.New()
			
 
				+	src, dst, _ := dmp.DiffLinesToRunes(str_from, str_to)
			
 
				+	diffs := dmp.DiffMainRunes(src, dst, false)
			
 
				+	common := 0
			
 
				+	for _, edit := range diffs {
			
 
				+		if edit.Type == diffmatchpatch.DiffEqual {
			
 
				+			common += utf8.RuneCountInString(edit.Text)
			
 
				+		}
			
 
				+	}
			
 
				+	return common*100/min(len(src), len(dst)) >=
			
 
				+		analyser.SimilarityThreshold
			
 
				+}
			
 
				+
			
 
				+func (analyser *Analyser) getBlob(hash plumbing.Hash) *object.Blob {
			
 
				+	blob, err := analyser.Repository.BlobObject(hash)
			
 
				+	if err != nil {
			
 
				+		panic(err)
			
 
				+	}
			
 
				+	return blob
			
 
				+}
			
 
				+
			
 
				+func (analyser *Analyser) cacheBlobs(changes object.Changes) *map[plumbing.Hash]*object.Blob {
			
 
				+	cache := make(map[plumbing.Hash]*object.Blob)
			
 
				+	for _, change := range changes {
			
 
				+		action, err := change.Action()
			
 
				+		if err != nil {
			
 
				+			panic(err)
			
 
				+		}
			
 
				+		switch action {
			
 
				+		case merkletrie.Insert:
			
 
				+			cache[change.To.TreeEntry.Hash] = analyser.getBlob(change.To.TreeEntry.Hash)
			
 
				+		case merkletrie.Delete:
			
 
				+			cache[change.From.TreeEntry.Hash] = analyser.getBlob(change.From.TreeEntry.Hash)
			
 
				+		case merkletrie.Modify:
			
 
				+			cache[change.To.TreeEntry.Hash] = analyser.getBlob(change.To.TreeEntry.Hash)
			
 
				+			cache[change.From.TreeEntry.Hash] = analyser.getBlob(change.From.TreeEntry.Hash)
			
 
				+		default:
			
 
				+			panic(fmt.Sprintf("unsupported action: %d", change.Action))
			
 
				+		}
			
 
				+	}
			
 
				+	return &cache
			
 
				+}
			
 
				+
			
 
				+func (analyser *Analyser) detectRenames(
			
 
				+	changes object.Changes, cache *map[plumbing.Hash]*object.Blob) object.Changes {
			
 
				+	reduced_changes := make(object.Changes, 0, changes.Len())
			
 
				+
			
 
				+	// Stage 1 - find renames by matching the hashes
			
 
				+	// n log(n)
			
 
				+	// We sort additions and deletions by hash and then do the single scan along
			
 
				+	// both slices.
			
 
				+	deleted := make(sortableChanges, 0, changes.Len())
			
 
				+	added := make(sortableChanges, 0, changes.Len())
			
 
				+	for _, change := range changes {
			
 
				+		action, err := change.Action()
			
 
				+		if err != nil {
			
 
				+			panic(err)
			
 
				+		}
			
 
				+		switch action {
			
 
				+		case merkletrie.Insert:
			
 
				+			added = append(added, sortableChange{change, change.To.TreeEntry.Hash})
			
 
				+		case merkletrie.Delete:
			
 
				+			deleted = append(deleted, sortableChange{change, change.From.TreeEntry.Hash})
			
 
				+		case merkletrie.Modify:
			
 
				+			reduced_changes = append(reduced_changes, change)
			
 
				+		default:
			
 
				+			panic(fmt.Sprintf("unsupported action: %d", change.Action))
			
 
				+		}
			
 
				+	}
			
 
				+	sort.Sort(deleted)
			
 
				+	sort.Sort(added)
			
 
				+	a := 0
			
 
				+	d := 0
			
 
				+	still_deleted := make(object.Changes, 0, deleted.Len())
			
 
				+	still_added := make(object.Changes, 0, added.Len())
			
 
				+	for a < added.Len() && d < deleted.Len() {
			
 
				+		if added[a].hash == deleted[d].hash {
			
 
				+			reduced_changes = append(
			
 
				+				reduced_changes,
			
 
				+				&object.Change{From: deleted[d].change.From, To: added[a].change.To})
			
 
				+			a++
			
 
				+			d++
			
 
				+		} else if added[a].Less(&deleted[d]) {
			
 
				+			still_added = append(still_added, added[a].change)
			
 
				+			a++
			
 
				+		} else {
			
 
				+			still_deleted = append(still_deleted, deleted[d].change)
			
 
				+			d++
			
 
				+		}
			
 
				+	}
			
 
				+	for ; a < added.Len(); a++ {
			
 
				+		still_added = append(still_added, added[a].change)
			
 
				+	}
			
 
				+	for ; d < deleted.Len(); d++ {
			
 
				+		still_deleted = append(still_deleted, deleted[d].change)
			
 
				+	}
			
 
				+
			
 
				+	// Stage 2 - apply the similarity threshold
			
 
				+	// n^2 but actually linear
			
 
				+	// We sort the blobs by size and do the single linear scan.
			
 
				+	added_blobs := make(sortableBlobs, 0, still_added.Len())
			
 
				+	deleted_blobs := make(sortableBlobs, 0, still_deleted.Len())
			
 
				+	for _, change := range still_added {
			
 
				+		blob := (*cache)[change.To.TreeEntry.Hash]
			
 
				+		added_blobs = append(
			
 
				+			added_blobs, sortableBlob{change: change, size: blob.Size})
			
 
				+	}
			
 
				+	for _, change := range still_deleted {
			
 
				+		blob := (*cache)[change.From.TreeEntry.Hash]
			
 
				+		deleted_blobs = append(
			
 
				+			deleted_blobs, sortableBlob{change: change, size: blob.Size})
			
 
				+	}
			
 
				+	sort.Sort(added_blobs)
			
 
				+	sort.Sort(deleted_blobs)
			
 
				+	d_start := 0
			
 
				+	for a = 0; a < added_blobs.Len(); a++ {
			
 
				+		my_blob := (*cache)[added_blobs[a].change.To.TreeEntry.Hash]
			
 
				+		my_size := added_blobs[a].size
			
 
				+		for d = d_start; d < deleted_blobs.Len() && !analyser.sizesAreClose(my_size, deleted_blobs[d].size); d++ {
			
 
				+		}
			
 
				+		d_start = d
			
 
				+		found_match := false
			
 
				+		for d = d_start; d < deleted_blobs.Len() && analyser.sizesAreClose(my_size, deleted_blobs[d].size); d++ {
			
 
				+			if analyser.blobsAreClose(
			
 
				+				my_blob, (*cache)[deleted_blobs[d].change.From.TreeEntry.Hash]) {
			
 
				+				found_match = true
			
 
				+				reduced_changes = append(
			
 
				+					reduced_changes,
			
 
				+					&object.Change{From: deleted_blobs[d].change.From,
			
 
				+						To: added_blobs[a].change.To})
			
 
				+				break
			
 
				+			}
			
 
				+		}
			
 
				+		if found_match {
			
 
				+			added_blobs = append(added_blobs[:a], added_blobs[a+1:]...)
			
 
				+			a--
			
 
				+			deleted_blobs = append(deleted_blobs[:d], deleted_blobs[d+1:]...)
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// Stage 3 - we give up, everything left are independent additions and deletions
			
 
				+	for _, blob := range added_blobs {
			
 
				+		reduced_changes = append(reduced_changes, blob.change)
			
 
				+	}
			
 
				+	for _, blob := range deleted_blobs {
			
 
				+		reduced_changes = append(reduced_changes, blob.change)
			
 
				+	}
			
 
				+	return reduced_changes
			
 
				+}
			
 
				+
			
 
				 func (analyser *Analyser) Analyse(commits []*object.Commit) [][]int64 {
			
 
				 	sampling := analyser.Sampling
			
 
				 	if sampling == 0 {
			
@@ -258,6 +462,9 @@ func (analyser *Analyser) Analyse(commits []*object.Commit) [][]int64 {
 
				 	if onProgress == nil {
			
 
				 		onProgress = func(int, int) {}
			
 
				 	}
			
 
				+	if analyser.SimilarityThreshold < 0 || analyser.SimilarityThreshold > 100 {
			
 
				+		panic("hercules.Analyser: an invalid SimilarityThreshold was specified")
			
 
				+	}
			
 
				 
			
 
				 	// current daily alive number of lines; key is the number of days from the
			
 
				 	// beginning of the history
			
@@ -311,6 +518,8 @@ func (analyser *Analyser) Analyse(commits []*object.Commit) [][]int64 {
 
				 			if err != nil {
			
 
				 				panic(err)
			
 
				 			}
			
 
				+			cache := analyser.cacheBlobs(tree_diff)
			
 
				+			tree_diff = analyser.detectRenames(tree_diff, cache)
			
 
				 			for _, change := range tree_diff {
			
 
				 				action, err := change.Action()
			
 
				 				if err != nil {
			
@@ -318,9 +527,9 @@ func (analyser *Analyser) Analyse(commits []*object.Commit) [][]int64 {
 
				 				}
			
 
				 				switch action {
			
 
				 				case merkletrie.Insert:
			
 
				-					analyser.handleInsertion(change, day, status, files)
			
 
				+					analyser.handleInsertion(change, day, status, files, cache)
			
 
				 				case merkletrie.Delete:
			
 
				-					analyser.handleDeletion(change, day, status, files)
			
 
				+					analyser.handleDeletion(change, day, status, files, cache)
			
 
				 				case merkletrie.Modify:
			
 
				 					func() {
			
 
				 						defer func() {
			
@@ -330,10 +539,8 @@ func (analyser *Analyser) Analyse(commits []*object.Commit) [][]int64 {
 
				 								panic(r)
			
 
				 							}
			
 
				 						}()
			
 
				-						analyser.handleModification(change, day, status, files)
			
 
				+						analyser.handleModification(change, day, status, files, cache)
			
 
				 					}()
			
 
				-				default:
			
 
				-					panic(fmt.Sprintf("unsupported action: %d", change.Action))
			
 
				 				}
			
 
				 			}
			
 
				 		}
			
--- a/cmd/hercules/main.go
+++ b/cmd/hercules/main.go
@@ -1,152 +1,155 @@
 
				 package main
			
 
				 
			
 
				 import (
			
 
				-    "bufio"
			
 
				-    "flag"
			
 
				-    "fmt"
			
 
				-    "io"
			
 
				-    "os"
			
 
				-    "runtime/pprof"
			
 
				-    "strconv"
			
 
				-    "strings"
			
 
				+	"bufio"
			
 
				+	"flag"
			
 
				+	"fmt"
			
 
				+	"io"
			
 
				+	"os"
			
 
				+	"runtime/pprof"
			
 
				+	"strconv"
			
 
				+	"strings"
			
 
				 
			
 
				-    "gopkg.in/src-d/go-billy.v2/osfs"
			
 
				-    "gopkg.in/src-d/go-git.v4"
			
 
				-    "gopkg.in/src-d/go-git.v4/storage"
			
 
				-    "gopkg.in/src-d/go-git.v4/storage/memory"
			
 
				-    "gopkg.in/src-d/go-git.v4/storage/filesystem"
			
 
				-    "gopkg.in/src-d/go-git.v4/plumbing"
			
 
				-    "gopkg.in/src-d/go-git.v4/plumbing/object"
			
 
				-    "gopkg.in/src-d/hercules.v1"
			
 
				+	"gopkg.in/src-d/go-billy.v2/osfs"
			
 
				+	"gopkg.in/src-d/go-git.v4"
			
 
				+	"gopkg.in/src-d/go-git.v4/plumbing"
			
 
				+	"gopkg.in/src-d/go-git.v4/plumbing/object"
			
 
				+	"gopkg.in/src-d/go-git.v4/storage"
			
 
				+	"gopkg.in/src-d/go-git.v4/storage/filesystem"
			
 
				+	"gopkg.in/src-d/go-git.v4/storage/memory"
			
 
				+	"gopkg.in/src-d/hercules.v1"
			
 
				 )
			
 
				 
			
 
				 func loadCommitsFromFile(path string, repository *git.Repository) []*object.Commit {
			
 
				-    var file io.Reader
			
 
				-    if path != "-" {
			
 
				-        file, err := os.Open(path)
			
 
				-        if err != nil {
			
 
				-            panic(err)
			
 
				-        }
			
 
				-        defer file.Close()
			
 
				-    } else {
			
 
				-        file = os.Stdin
			
 
				-    }
			
 
				-    scanner := bufio.NewScanner(file)
			
 
				-    commits := []*object.Commit{}
			
 
				-    for scanner.Scan() {
			
 
				-        hash := plumbing.NewHash(scanner.Text())
			
 
				-        if len(hash) != 20 {
			
 
				-            panic("invalid commit hash " + scanner.Text())
			
 
				-        }
			
 
				-        commit, err := repository.CommitObject(hash)
			
 
				-        if err != nil {
			
 
				-            panic(err)
			
 
				-        }
			
 
				-        commits = append(commits, commit)
			
 
				-    }
			
 
				-    return commits
			
 
				+	var file io.Reader
			
 
				+	if path != "-" {
			
 
				+		file, err := os.Open(path)
			
 
				+		if err != nil {
			
 
				+			panic(err)
			
 
				+		}
			
 
				+		defer file.Close()
			
 
				+	} else {
			
 
				+		file = os.Stdin
			
 
				+	}
			
 
				+	scanner := bufio.NewScanner(file)
			
 
				+	commits := []*object.Commit{}
			
 
				+	for scanner.Scan() {
			
 
				+		hash := plumbing.NewHash(scanner.Text())
			
 
				+		if len(hash) != 20 {
			
 
				+			panic("invalid commit hash " + scanner.Text())
			
 
				+		}
			
 
				+		commit, err := repository.CommitObject(hash)
			
 
				+		if err != nil {
			
 
				+			panic(err)
			
 
				+		}
			
 
				+		commits = append(commits, commit)
			
 
				+	}
			
 
				+	return commits
			
 
				 }
			
 
				 
			
 
				 func main() {
			
 
				-    var profile bool
			
 
				-    var granularity, sampling int
			
 
				-    var commitsFile string
			
 
				-    flag.BoolVar(&profile, "profile", false, "Collect the profile to hercules.pprof.")
			
 
				-    flag.IntVar(&granularity, "granularity", 30, "Report granularity in days.")
			
 
				-    flag.IntVar(&sampling, "sampling", 30, "Report sampling in days.")
			
 
				-    flag.StringVar(&commitsFile, "commits", "", "Path to the text file with the " +
			
 
				-        "commit history to follow instead of the default rev-list " +
			
 
				-            "--first-parent. The format is the list of hashes, each hash on a " +
			
 
				-            "separate line. The first hash is the root.")
			
 
				-    flag.Parse()
			
 
				-    if (granularity <= 0) {
			
 
				-        fmt.Fprint(os.Stderr, "Warning: adjusted the granularity to 1 day\n")
			
 
				-        granularity = 1
			
 
				-    }
			
 
				-    if profile {
			
 
				-        prof, _ := os.Create("hercules.pprof")
			
 
				-        pprof.StartCPUProfile(prof)
			
 
				-        defer pprof.StopCPUProfile()
			
 
				-    }
			
 
				-    if len(flag.Args()) == 0 || len(flag.Args()) > 3 {
			
 
				-        fmt.Fprint(os.Stderr,
			
 
				-                     "Usage: hercules <path to repo or URL> [<disk cache path>]\n")
			
 
				-        os.Exit(1)
			
 
				-    }
			
 
				-  uri := flag.Arg(0)
			
 
				-    var repository *git.Repository
			
 
				-    var storage storage.Storer
			
 
				-    var err error
			
 
				-    if strings.Contains(uri, "://") {
			
 
				-        if len(flag.Args()) == 2 {
			
 
				-            storage, err = filesystem.NewStorage(osfs.New(flag.Arg(1)))
			
 
				-            if err != nil {
			
 
				-                panic(err)
			
 
				-            }
			
 
				-        } else {
			
 
				-            storage = memory.NewStorage()
			
 
				-        }
			
 
				-        fmt.Fprint(os.Stderr, "cloning...\r")
			
 
				-        repository, err = git.Clone(storage, nil, &git.CloneOptions{
			
 
				-            URL: uri,
			
 
				-        })
			
 
				-        fmt.Fprint(os.Stderr, "          \r")
			
 
				-    } else {
			
 
				-        if uri[len(uri) - 1] == os.PathSeparator {
			
 
				-            uri = uri[:len(uri) - 1]
			
 
				-        }
			
 
				-        if !strings.HasSuffix(uri, ".git") {
			
 
				-            uri += string(os.PathSeparator) + ".git"
			
 
				-        }
			
 
				-        repository, err = git.PlainOpen(uri)
			
 
				-    }
			
 
				-    if err != nil {
			
 
				-        panic(err)
			
 
				-    }
			
 
				-    // core logic
			
 
				-    analyser := hercules.Analyser{
			
 
				-        Repository: repository,
			
 
				-        OnProgress: func(commit, length int) {
			
 
				-          fmt.Fprintf(os.Stderr, "%d / %d\r", commit, length)
			
 
				-      },
			
 
				-        Granularity: granularity,
			
 
				-        Sampling: sampling,
			
 
				-    }
			
 
				-    // list of commits belonging to the default branch, from oldest to newest
			
 
				-    // rev-list --first-parent
			
 
				-    var commits []*object.Commit
			
 
				-    if commitsFile == "" {
			
 
				-        commits = analyser.Commits()
			
 
				-    } else {
			
 
				-        commits = loadCommitsFromFile(commitsFile, repository)
			
 
				-    }
			
 
				-    statuses := analyser.Analyse(commits)
			
 
				-    fmt.Fprint(os.Stderr, "                \r")
			
 
				-    if len(statuses) == 0 {
			
 
				-        return
			
 
				-    }
			
 
				-    // determine the maximum length of each value
			
 
				-    var maxnum int64
			
 
				-    for _, status := range statuses {
			
 
				-        for _, val := range status {
			
 
				-            if val > maxnum {
			
 
				-                maxnum = val
			
 
				-            }
			
 
				-        }
			
 
				-    }
			
 
				-    width := len(strconv.FormatInt(maxnum, 10))
			
 
				-    last := len(statuses[len(statuses) - 1])
			
 
				-    // print the start date, granularity, sampling
			
 
				-    fmt.Println(commits[0].Author.When.Unix(), granularity, sampling)
			
 
				-    // print the resulting triangle matrix
			
 
				-    for _, status := range statuses {
			
 
				-        for i := 0; i < last; i++ {
			
 
				-            var val int64
			
 
				-            if i < len(status) {
			
 
				-                val = status[i]
			
 
				-            }
			
 
				-            fmt.Printf("%[1]*[2]d ", width, val)
			
 
				-        }
			
 
				-        fmt.Println()
			
 
				-    }
			
 
				+	var profile bool
			
 
				+	var granularity, sampling, similarity_threshold int
			
 
				+	var commitsFile string
			
 
				+	flag.BoolVar(&profile, "profile", false, "Collect the profile to hercules.pprof.")
			
 
				+	flag.IntVar(&granularity, "granularity", 30, "Report granularity in days.")
			
 
				+	flag.IntVar(&sampling, "sampling", 30, "Report sampling in days.")
			
 
				+	flag.IntVar(&similarity_threshold, "M", 90,
			
 
				+		"A threshold on the similarity index used to detect renames.")
			
 
				+	flag.StringVar(&commitsFile, "commits", "", "Path to the text file with the "+
			
 
				+		"commit history to follow instead of the default rev-list "+
			
 
				+		"--first-parent. The format is the list of hashes, each hash on a "+
			
 
				+		"separate line. The first hash is the root.")
			
 
				+	flag.Parse()
			
 
				+	if granularity <= 0 {
			
 
				+		fmt.Fprint(os.Stderr, "Warning: adjusted the granularity to 1 day\n")
			
 
				+		granularity = 1
			
 
				+	}
			
 
				+	if profile {
			
 
				+		prof, _ := os.Create("hercules.pprof")
			
 
				+		pprof.StartCPUProfile(prof)
			
 
				+		defer pprof.StopCPUProfile()
			
 
				+	}
			
 
				+	if len(flag.Args()) == 0 || len(flag.Args()) > 3 {
			
 
				+		fmt.Fprint(os.Stderr,
			
 
				+			"Usage: hercules <path to repo or URL> [<disk cache path>]\n")
			
 
				+		os.Exit(1)
			
 
				+	}
			
 
				+	uri := flag.Arg(0)
			
 
				+	var repository *git.Repository
			
 
				+	var storage storage.Storer
			
 
				+	var err error
			
 
				+	if strings.Contains(uri, "://") {
			
 
				+		if len(flag.Args()) == 2 {
			
 
				+			storage, err = filesystem.NewStorage(osfs.New(flag.Arg(1)))
			
 
				+			if err != nil {
			
 
				+				panic(err)
			
 
				+			}
			
 
				+		} else {
			
 
				+			storage = memory.NewStorage()
			
 
				+		}
			
 
				+		fmt.Fprint(os.Stderr, "cloning...\r")
			
 
				+		repository, err = git.Clone(storage, nil, &git.CloneOptions{
			
 
				+			URL: uri,
			
 
				+		})
			
 
				+		fmt.Fprint(os.Stderr, "          \r")
			
 
				+	} else {
			
 
				+		if uri[len(uri)-1] == os.PathSeparator {
			
 
				+			uri = uri[:len(uri)-1]
			
 
				+		}
			
 
				+		if !strings.HasSuffix(uri, ".git") {
			
 
				+			uri += string(os.PathSeparator) + ".git"
			
 
				+		}
			
 
				+		repository, err = git.PlainOpen(uri)
			
 
				+	}
			
 
				+	if err != nil {
			
 
				+		panic(err)
			
 
				+	}
			
 
				+	// core logic
			
 
				+	analyser := hercules.Analyser{
			
 
				+		Repository: repository,
			
 
				+		OnProgress: func(commit, length int) {
			
 
				+			fmt.Fprintf(os.Stderr, "%d / %d\r", commit, length)
			
 
				+		},
			
 
				+		Granularity:         granularity,
			
 
				+		Sampling:            sampling,
			
 
				+		SimilarityThreshold: similarity_threshold,
			
 
				+	}
			
 
				+	// list of commits belonging to the default branch, from oldest to newest
			
 
				+	// rev-list --first-parent
			
 
				+	var commits []*object.Commit
			
 
				+	if commitsFile == "" {
			
 
				+		commits = analyser.Commits()
			
 
				+	} else {
			
 
				+		commits = loadCommitsFromFile(commitsFile, repository)
			
 
				+	}
			
 
				+	statuses := analyser.Analyse(commits)
			
 
				+	fmt.Fprint(os.Stderr, "                \r")
			
 
				+	if len(statuses) == 0 {
			
 
				+		return
			
 
				+	}
			
 
				+	// determine the maximum length of each value
			
 
				+	var maxnum int64
			
 
				+	for _, status := range statuses {
			
 
				+		for _, val := range status {
			
 
				+			if val > maxnum {
			
 
				+				maxnum = val
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	width := len(strconv.FormatInt(maxnum, 10))
			
 
				+	last := len(statuses[len(statuses)-1])
			
 
				+	// print the start date, granularity, sampling
			
 
				+	fmt.Println(commits[0].Author.When.Unix(), granularity, sampling)
			
 
				+	// print the resulting triangle matrix
			
 
				+	for _, status := range statuses {
			
 
				+		for i := 0; i < last; i++ {
			
 
				+			var val int64
			
 
				+			if i < len(status) {
			
 
				+				val = status[i]
			
 
				+			}
			
 
				+			fmt.Printf("%[1]*[2]d ", width, val)
			
 
				+		}
			
 
				+		fmt.Println()
			
 
				+	}
			
 
				 }
			
--- a/file.go
+++ b/file.go
@@ -9,6 +9,9 @@ type File struct {
 
				 
			
 
				 const TreeEnd int = -1
			
 
				 
			
 
				+// An ugly side of Go.
			
 
				+// template <typename T> please!
			
 
				+
			
 
				 func min(a int, b int) int {
			
 
				 	if a < b {
			
 
				 		return a
			
@@ -16,6 +19,13 @@ func min(a int, b int) int {
 
				 	return b
			
 
				 }
			
 
				 
			
 
				+func min64(a int64, b int64) int64 {
			
 
				+	if a < b {
			
 
				+		return a
			
 
				+	}
			
 
				+	return b
			
 
				+}
			
 
				+
			
 
				 func max(a int, b int) int {
			
 
				 	if a < b {
			
 
				 		return b
			
@@ -23,6 +33,13 @@ func max(a int, b int) int {
 
				 	return a
			
 
				 }
			
 
				 
			
 
				+func abs64(v int64) int64 {
			
 
				+	if v <= 0 {
			
 
				+		return -v
			
 
				+	}
			
 
				+	return v
			
 
				+}
			
 
				+
			
 
				 func NewFile(time int, length int, status map[int]int64) *File {
			
 
				 	file := new(File)
			
 
				 	file.status = status