8 лет назад · 534edfc9eb
--- a/README.md
+++ b/README.md
@@ -167,7 +167,7 @@ hercules --couples [-people-dict=/path/to/identities]
 
																 python3 labours.py -m couples -o <name> [--couples-tmp-dir=/tmp]
															
 
																 ```
															
 
																-**Important**: it requires Tensorflow to be installed, please follow [official instuctions](https://www.tensorflow.org/install/).
															
 
																+**Important**: it requires Tensorflow to be installed, please follow [official instructions](https://www.tensorflow.org/install/).
															
 
																 The files are coupled if they are changed in the same commit. The developers are coupled if they
															
 
																 change the same file. `hercules` records the number of couples throught the whole commti history
															
--- a/blob_cache.go
+++ b/blob_cache.go
@@ -11,7 +11,13 @@ import (
 
																 	"gopkg.in/src-d/go-git.v4/utils/merkletrie"
															
 
																 )
															
 
																+// This PipelineItem loads the blobs which correspond to the changed files in a commit.
															
 
																+// It must provide the old and the new objects; "cache" rotates and allows to not load
															
 
																+// the same blobs twice. Outdated objects are removed so "cache" never grows big.
															
 
																 type BlobCache struct {
															
 
																+	// Specifies how to handle the situation when we encounter a git submodule - an object without
															
 
																+	// the blob. If false, we look inside .gitmodules and if don't find, raise an error.
															
 
																+	// If true, we do not look inside .gitmodules and always succeed.
															
 
																 	IgnoreMissingSubmodules bool
															
 
																 	repository *git.Repository
															
@@ -119,8 +125,12 @@ func (self *BlobCache) Consume(deps map[string]interface{}) (map[string]interfac
 
																 	return map[string]interface{}{DependencyBlobCache: cache}, nil
															
 
																 }
															
 
																+// The definition of a function which loads a git file by the specified path.
															
 
																+// The state can be arbitrary though here it always corresponds to the currently processed
															
 
																+// commit.
															
 
																 type FileGetter func(path string) (*object.File, error)
															
 
																+// Returns the blob which corresponds to the specified ChangeEntry.
															
 
																 func (cache *BlobCache) getBlob(entry *object.ChangeEntry, fileGetter FileGetter) (
															
 
																 	*object.Blob, error) {
															
 
																 	blob, err := cache.repository.BlobObject(entry.TreeEntry.Hash)
															
--- a/burndown.go
+++ b/burndown.go
@@ -20,18 +20,18 @@ import (
 
																 )
															
 
																 // BurndownAnalyser allows to gather the line burndown statistics for a Git repository.
															
 
																+// Reference: https://erikbern.com/2016/12/05/the-half-life-of-code.html
															
 
																 type BurndownAnalysis struct {
															
 
																 	// Granularity sets the size of each band - the number of days it spans.
															
 
																 	// Smaller values provide better resolution but require more work and eat more
															
 
																 	// memory. 30 days is usually enough.
															
 
																 	Granularity int
															
 
																 	// Sampling sets how detailed is the statistic - the size of the interval in
															
 
																-	// days between consecutive measurements. It is usually a good idea to set it
															
 
																-	// <= Granularity. Try 15 or 30.
															
 
																+	// days between consecutive measurements. It may not be greater than Granularity. Try 15 or 30.
															
 
																 	Sampling int
															
 
																 	// TrackFiles enables or disables the fine-grained per-file burndown analysis.
															
 
																-	// It does not change the top level burndown results.
															
 
																+	// It does not change the project level burndown results.
															
 
																 	TrackFiles bool
															
 
																 	// The number of developers for which to collect the burndown stats. 0 disables it.
															
@@ -47,11 +47,11 @@ type BurndownAnalysis struct {
 
																 	// globalStatus is the current daily alive number of lines; key is the number
															
 
																 	// of days from the beginning of the history.
															
 
																 	globalStatus map[int]int64
															
 
																-	// globalHistory is the weekly snapshots of globalStatus.
															
 
																+	// globalHistory is the periodic snapshots of globalStatus.
															
 
																 	globalHistory [][]int64
															
 
																-	// fileHistories is the weekly snapshots of each file's status.
															
 
																+	// fileHistories is the periodic snapshots of each file's status.
															
 
																 	fileHistories map[string][][]int64
															
 
																-	// peopleHistories is the weekly snapshots of each person's status.
															
 
																+	// peopleHistories is the periodic snapshots of each person's status.
															
 
																 	peopleHistories [][][]int64
															
 
																 	// files is the mapping <file path> -> *File.
															
 
																 	files map[string]*File
															
@@ -68,21 +68,44 @@ type BurndownAnalysis struct {
 
																 	reversedPeopleDict []string
															
 
																 }
															
 
																+// Carries the result of running BurndownAnalysis - it is returned by BurndownAnalysis.Finalize().
															
 
																 type BurndownResult struct {
															
 
																-	GlobalHistory      [][]int64
															
 
																-	FileHistories      map[string][][]int64
															
 
																-	PeopleHistories    [][][]int64
															
 
																-	PeopleMatrix       [][]int64
															
 
																+	// [number of samples][number of bands]
															
 
																+	// The number of samples depends on Sampling: the less Sampling, the bigger the number.
															
 
																+	// The number of bands depends on Granularity: the less Granularity, the bigger the number.
															
 
																+	GlobalHistory [][]int64
															
 
																+	// The key is the path inside the Git repository. The value's dimensions are the same as
															
 
																+	// in GlobalHistory.
															
 
																+	FileHistories map[string][][]int64
															
 
																+	// [number of people][number of samples][number of bands]
															
 
																+	PeopleHistories [][][]int64
															
 
																+	// [number of people][number of people + 2]
															
 
																+	// The first element is the total number of lines added by the author.
															
 
																+	// The second element is the number of removals by unidentified authors (outside reversedPeopleDict).
															
 
																+	// The rest of the elements are equal the number of line removals by the corresponding
															
 
																+	// authors in reversedPeopleDict: 2 -> 0, 3 -> 1, etc.
															
 
																+	PeopleMatrix [][]int64
															
 
																+
															
 
																+	// The following members are private.
															
 
																+
															
 
																+	// reversedPeopleDict is borrowed from IdentityDetector and becomes available after
															
 
																+	// Pipeline.Initialize(facts map[string]interface{}). Thus it can be obtained via
															
 
																+	// facts[FactIdentityDetectorReversedPeopleDict].
															
 
																 	reversedPeopleDict []string
															
 
																-	sampling           int
															
 
																-	granularity        int
															
 
																+	// sampling and granularity are copied from BurndownAnalysis and stored for service purposes
															
 
																+	// such as merging several results together.
															
 
																+	sampling    int
															
 
																+	granularity int
															
 
																 }
															
 
																 const (
															
 
																-	ConfigBurndownGranularity  = "Burndown.Granularity"
															
 
																-	ConfigBurndownSampling     = "Burndown.Sampling"
															
 
																-	ConfigBurndownTrackFiles   = "Burndown.TrackFiles"
															
 
																-	ConfigBurndownTrackPeople  = "Burndown.TrackPeople"
															
 
																+	ConfigBurndownGranularity = "Burndown.Granularity"
															
 
																+	ConfigBurndownSampling    = "Burndown.Sampling"
															
 
																+	// Measuring individual files is optional and false by default.
															
 
																+	ConfigBurndownTrackFiles = "Burndown.TrackFiles"
															
 
																+	// Measuring authors is optional and false by default.
															
 
																+	ConfigBurndownTrackPeople = "Burndown.TrackPeople"
															
 
																+	// Enables some extra debug assertions.
															
 
																 	ConfigBurndownDebug        = "Burndown.Debug"
															
 
																 	DefaultBurndownGranularity = 30
															
 
																 )
															
@@ -221,12 +244,7 @@ func (analyser *BurndownAnalysis) Consume(deps map[string]interface{}) (map[stri
 
																 	return nil, nil
															
 
																 }
															
 
																-// Finalize() returns the list of snapshots of the cumulative line edit times
															
 
																-// and the similar lists for every file which is alive in HEAD.
															
 
																-// The number of snapshots (the first dimension >[]<[]int64) depends on
															
 
																-// Analyser.Sampling (the more Sampling, the less the value); the length of
															
 
																-// each snapshot depends on Analyser.Granularity (the more Granularity,
															
 
																-// the less the value).
															
 
																+// Finalize() returns BurndownResult.
															
 
																 func (analyser *BurndownAnalysis) Finalize() interface{} {
															
 
																 	gs, fss, pss := analyser.groupStatus()
															
 
																 	analyser.updateHistories(gs, fss, pss, 1)
															
@@ -446,6 +464,9 @@ func (analyser *BurndownAnalysis) MergeResults(
 
																 	return merged
															
 
																 }
															
 
																+// mergeMatrices takes two [number of samples][number of bands] matrices,
															
 
																+// resamples them to days so that they become square, sums and resamples back to the
															
 
																+// least of (sampling1, sampling2) and (granularity1, granularity2).
															
 
																 func mergeMatrices(m1, m2 [][]int64, granularity1, sampling1, granularity2, sampling2 int,
															
 
																 	c1, c2 *CommonAnalysisResult) [][]int64 {
															
 
																 	commonMerged := *c1
															
@@ -735,6 +756,10 @@ func checkClose(c io.Closer) {
 
																 	}
															
 
																 }
															
 
																+// We do a hack and store the day in the first 14 bits and the author index in the last 18.
															
 
																+// Strictly speaking, int can be 64-bit and then the author index occupies 32+18 bits.
															
 
																+// This hack is needed to simplify the values storage inside File-s. We can compare
															
 
																+// different values together and they are compared as days for the same author.
															
 
																 func (analyser *BurndownAnalysis) packPersonWithDay(person int, day int) int {
															
 
																 	if analyser.PeopleNumber == 0 {
															
 
																 		return day
															
--- a/pipeline.go
+++ b/pipeline.go
@@ -18,6 +18,7 @@ import (
 
																 	"gopkg.in/src-d/hercules.v3/toposort"
															
 
																 )
															
 
																+// ConfigurationOptionType represents the possible types of a ConfigurationOption's value.
															
 
																 type ConfigurationOptionType int
															
 
																 const (
															
@@ -29,6 +30,8 @@ const (
 
																 	StringConfigurationOption
															
 
																 )
															
 
																+// String() returns an empty string for the boolean type, "int" for integers and "string" for
															
 
																+// strings. It is used in the command line interface to show the argument's type.
															
 
																 func (opt ConfigurationOptionType) String() string {
															
 
																 	switch opt {
															
 
																 	case BoolConfigurationOption:
															
@@ -47,7 +50,7 @@ type ConfigurationOption struct {
 
																 	Name string
															
 
																 	// Description represents the help text about the configuration option.
															
 
																 	Description string
															
 
																-	// Flag corresponds to the CLI token with "-" prepended.
															
 
																+	// Flag corresponds to the CLI token with "--" prepended.
															
 
																 	Flag string
															
 
																 	// Type specifies the kind of the configuration option's value.
															
 
																 	Type ConfigurationOptionType
															
@@ -55,6 +58,8 @@ type ConfigurationOption struct {
 
																 	Default interface{}
															
 
																 }
															
 
																+// FormatDefault() converts the default value of ConfigurationOption to string.
															
 
																+// Used in the command line interface to show the argument's default value.
															
 
																 func (opt ConfigurationOption) FormatDefault() string {
															
 
																 	if opt.Type != StringConfigurationOption {
															
 
																 		return fmt.Sprint(opt.Default)
															
@@ -62,7 +67,7 @@ func (opt ConfigurationOption) FormatDefault() string {
 
																 	return fmt.Sprintf("\"%s\"", opt.Default)
															
 
																 }
															
 
																-// PipelineItem is the interface for all the units of the Git commit analysis pipeline.
															
 
																+// PipelineItem is the interface for all the units in the Git commits analysis pipeline.
															
 
																 type PipelineItem interface {
															
 
																 	// Name returns the name of the analysis.
															
 
																 	Name() string
															
@@ -126,14 +131,19 @@ type CommonAnalysisResult struct {
 
																 	RunTime time.Duration
															
 
																 }
															
 
																+// BeginTimeAsTime() converts the UNIX timestamp of the beginning to Go time.
															
 
																 func (car *CommonAnalysisResult) BeginTimeAsTime() time.Time {
															
 
																 	return time.Unix(car.BeginTime, 0)
															
 
																 }
															
 
																+// EndTimeAsTime() converts the UNIX timestamp of the ending to Go time.
															
 
																 func (car *CommonAnalysisResult) EndTimeAsTime() time.Time {
															
 
																 	return time.Unix(car.EndTime, 0)
															
 
																 }
															
 
																+// Merge() combines the CommonAnalysisResult with an other one.
															
 
																+// We choose the earlier BeginTime, the later EndTime, sum the number of commits and the
															
 
																+// elapsed run times.
															
 
																 func (car *CommonAnalysisResult) Merge(other *CommonAnalysisResult) {
															
 
																 	if car.EndTime == 0 || other.BeginTime == 0 {
															
 
																 		panic("Merging with an uninitialized CommonAnalysisResult")
															
@@ -148,6 +158,7 @@ func (car *CommonAnalysisResult) Merge(other *CommonAnalysisResult) {
 
																 	car.RunTime += other.RunTime
															
 
																 }
															
 
																+// FillMetadata() copies the data to a Protobuf message.
															
 
																 func (car *CommonAnalysisResult) FillMetadata(meta *pb.Metadata) *pb.Metadata {
															
 
																 	meta.BeginUnixTime = car.BeginTime
															
 
																 	meta.EndUnixTime = car.EndTime
															
@@ -156,6 +167,7 @@ func (car *CommonAnalysisResult) FillMetadata(meta *pb.Metadata) *pb.Metadata {
 
																 	return meta
															
 
																 }
															
 
																+// MetadataToCommonAnalysisResult() copies the data from a Protobuf message.
															
 
																 func MetadataToCommonAnalysisResult(meta *pb.Metadata) *CommonAnalysisResult {
															
 
																 	return &CommonAnalysisResult{
															
 
																 		BeginTime:     meta.BeginUnixTime,
															
@@ -165,6 +177,8 @@ func MetadataToCommonAnalysisResult(meta *pb.Metadata) *CommonAnalysisResult {
 
																 	}
															
 
																 }
															
 
																+// The core Hercules entity which carries several PipelineItems and executes them.
															
 
																+// See the extended example of how a Pipeline works in doc.go.
															
 
																 type Pipeline struct {
															
 
																 	// OnProgress is the callback which is invoked in Analyse() to output it's
															
 
																 	// progress. The first argument is the number of processed commits and the
															
@@ -186,9 +200,13 @@ type Pipeline struct {
 
																 }
															
 
																 const (
															
 
																+	// Makes Pipeline to save the DAG to the specified file.
															
 
																 	ConfigPipelineDumpPath = "Pipeline.DumpPath"
															
 
																-	ConfigPipelineDryRun   = "Pipeline.DryRun"
															
 
																-	FactPipelineCommits    = "commits"
															
 
																+	// Disables Configure() and Initialize() invokation on each PipelineItem during the initialization.
															
 
																+	// Subsequent Run() calls are going to fail. Useful with ConfigPipelineDumpPath=true.
															
 
																+	ConfigPipelineDryRun = "Pipeline.DryRun"
															
 
																+	// Allows to specify the custom commit chain. By default, Pipeline.Commits() is used.
															
 
																+	FactPipelineCommits = "commits"
															
 
																 )
															
 
																 func NewPipeline(repository *git.Repository) *Pipeline {