Vadim Markovtsev 7 роки тому
батько
коміт
534edfc9eb
4 змінених файлів з 80 додано та 27 видалено
  1. 1 1
      README.md
  2. 10 0
      blob_cache.go
  3. 47 22
      burndown.go
  4. 22 4
      pipeline.go

+ 1 - 1
README.md

@@ -167,7 +167,7 @@ hercules --couples [-people-dict=/path/to/identities]
 python3 labours.py -m couples -o <name> [--couples-tmp-dir=/tmp]
 ```
 
-**Important**: it requires Tensorflow to be installed, please follow [official instuctions](https://www.tensorflow.org/install/).
+**Important**: it requires Tensorflow to be installed, please follow [official instructions](https://www.tensorflow.org/install/).
 
 The files are coupled if they are changed in the same commit. The developers are coupled if they
 change the same file. `hercules` records the number of couples throught the whole commti history

+ 10 - 0
blob_cache.go

@@ -11,7 +11,13 @@ import (
 	"gopkg.in/src-d/go-git.v4/utils/merkletrie"
 )
 
+// This PipelineItem loads the blobs which correspond to the changed files in a commit.
+// It must provide the old and the new objects; "cache" rotates and allows to not load
+// the same blobs twice. Outdated objects are removed so "cache" never grows big.
 type BlobCache struct {
+	// Specifies how to handle the situation when we encounter a git submodule - an object without
+	// the blob. If false, we look inside .gitmodules and if don't find, raise an error.
+	// If true, we do not look inside .gitmodules and always succeed.
 	IgnoreMissingSubmodules bool
 
 	repository *git.Repository
@@ -119,8 +125,12 @@ func (self *BlobCache) Consume(deps map[string]interface{}) (map[string]interfac
 	return map[string]interface{}{DependencyBlobCache: cache}, nil
 }
 
+// The definition of a function which loads a git file by the specified path.
+// The state can be arbitrary though here it always corresponds to the currently processed
+// commit.
 type FileGetter func(path string) (*object.File, error)
 
+// Returns the blob which corresponds to the specified ChangeEntry.
 func (cache *BlobCache) getBlob(entry *object.ChangeEntry, fileGetter FileGetter) (
 	*object.Blob, error) {
 	blob, err := cache.repository.BlobObject(entry.TreeEntry.Hash)

+ 47 - 22
burndown.go

@@ -20,18 +20,18 @@ import (
 )
 
 // BurndownAnalyser allows to gather the line burndown statistics for a Git repository.
+// Reference: https://erikbern.com/2016/12/05/the-half-life-of-code.html
 type BurndownAnalysis struct {
 	// Granularity sets the size of each band - the number of days it spans.
 	// Smaller values provide better resolution but require more work and eat more
 	// memory. 30 days is usually enough.
 	Granularity int
 	// Sampling sets how detailed is the statistic - the size of the interval in
-	// days between consecutive measurements. It is usually a good idea to set it
-	// <= Granularity. Try 15 or 30.
+	// days between consecutive measurements. It may not be greater than Granularity. Try 15 or 30.
 	Sampling int
 
 	// TrackFiles enables or disables the fine-grained per-file burndown analysis.
-	// It does not change the top level burndown results.
+	// It does not change the project level burndown results.
 	TrackFiles bool
 
 	// The number of developers for which to collect the burndown stats. 0 disables it.
@@ -47,11 +47,11 @@ type BurndownAnalysis struct {
 	// globalStatus is the current daily alive number of lines; key is the number
 	// of days from the beginning of the history.
 	globalStatus map[int]int64
-	// globalHistory is the weekly snapshots of globalStatus.
+	// globalHistory is the periodic snapshots of globalStatus.
 	globalHistory [][]int64
-	// fileHistories is the weekly snapshots of each file's status.
+	// fileHistories is the periodic snapshots of each file's status.
 	fileHistories map[string][][]int64
-	// peopleHistories is the weekly snapshots of each person's status.
+	// peopleHistories is the periodic snapshots of each person's status.
 	peopleHistories [][][]int64
 	// files is the mapping <file path> -> *File.
 	files map[string]*File
@@ -68,21 +68,44 @@ type BurndownAnalysis struct {
 	reversedPeopleDict []string
 }
 
+// Carries the result of running BurndownAnalysis - it is returned by BurndownAnalysis.Finalize().
 type BurndownResult struct {
-	GlobalHistory      [][]int64
-	FileHistories      map[string][][]int64
-	PeopleHistories    [][][]int64
-	PeopleMatrix       [][]int64
+	// [number of samples][number of bands]
+	// The number of samples depends on Sampling: the less Sampling, the bigger the number.
+	// The number of bands depends on Granularity: the less Granularity, the bigger the number.
+	GlobalHistory [][]int64
+	// The key is the path inside the Git repository. The value's dimensions are the same as
+	// in GlobalHistory.
+	FileHistories map[string][][]int64
+	// [number of people][number of samples][number of bands]
+	PeopleHistories [][][]int64
+	// [number of people][number of people + 2]
+	// The first element is the total number of lines added by the author.
+	// The second element is the number of removals by unidentified authors (outside reversedPeopleDict).
+	// The rest of the elements are equal the number of line removals by the corresponding
+	// authors in reversedPeopleDict: 2 -> 0, 3 -> 1, etc.
+	PeopleMatrix [][]int64
+
+	// The following members are private.
+
+	// reversedPeopleDict is borrowed from IdentityDetector and becomes available after
+	// Pipeline.Initialize(facts map[string]interface{}). Thus it can be obtained via
+	// facts[FactIdentityDetectorReversedPeopleDict].
 	reversedPeopleDict []string
-	sampling           int
-	granularity        int
+	// sampling and granularity are copied from BurndownAnalysis and stored for service purposes
+	// such as merging several results together.
+	sampling    int
+	granularity int
 }
 
 const (
-	ConfigBurndownGranularity  = "Burndown.Granularity"
-	ConfigBurndownSampling     = "Burndown.Sampling"
-	ConfigBurndownTrackFiles   = "Burndown.TrackFiles"
-	ConfigBurndownTrackPeople  = "Burndown.TrackPeople"
+	ConfigBurndownGranularity = "Burndown.Granularity"
+	ConfigBurndownSampling    = "Burndown.Sampling"
+	// Measuring individual files is optional and false by default.
+	ConfigBurndownTrackFiles = "Burndown.TrackFiles"
+	// Measuring authors is optional and false by default.
+	ConfigBurndownTrackPeople = "Burndown.TrackPeople"
+	// Enables some extra debug assertions.
 	ConfigBurndownDebug        = "Burndown.Debug"
 	DefaultBurndownGranularity = 30
 )
@@ -221,12 +244,7 @@ func (analyser *BurndownAnalysis) Consume(deps map[string]interface{}) (map[stri
 	return nil, nil
 }
 
-// Finalize() returns the list of snapshots of the cumulative line edit times
-// and the similar lists for every file which is alive in HEAD.
-// The number of snapshots (the first dimension >[]<[]int64) depends on
-// Analyser.Sampling (the more Sampling, the less the value); the length of
-// each snapshot depends on Analyser.Granularity (the more Granularity,
-// the less the value).
+// Finalize() returns BurndownResult.
 func (analyser *BurndownAnalysis) Finalize() interface{} {
 	gs, fss, pss := analyser.groupStatus()
 	analyser.updateHistories(gs, fss, pss, 1)
@@ -446,6 +464,9 @@ func (analyser *BurndownAnalysis) MergeResults(
 	return merged
 }
 
+// mergeMatrices takes two [number of samples][number of bands] matrices,
+// resamples them to days so that they become square, sums and resamples back to the
+// least of (sampling1, sampling2) and (granularity1, granularity2).
 func mergeMatrices(m1, m2 [][]int64, granularity1, sampling1, granularity2, sampling2 int,
 	c1, c2 *CommonAnalysisResult) [][]int64 {
 	commonMerged := *c1
@@ -735,6 +756,10 @@ func checkClose(c io.Closer) {
 	}
 }
 
+// We do a hack and store the day in the first 14 bits and the author index in the last 18.
+// Strictly speaking, int can be 64-bit and then the author index occupies 32+18 bits.
+// This hack is needed to simplify the values storage inside File-s. We can compare
+// different values together and they are compared as days for the same author.
 func (analyser *BurndownAnalysis) packPersonWithDay(person int, day int) int {
 	if analyser.PeopleNumber == 0 {
 		return day

+ 22 - 4
pipeline.go

@@ -18,6 +18,7 @@ import (
 	"gopkg.in/src-d/hercules.v3/toposort"
 )
 
+// ConfigurationOptionType represents the possible types of a ConfigurationOption's value.
 type ConfigurationOptionType int
 
 const (
@@ -29,6 +30,8 @@ const (
 	StringConfigurationOption
 )
 
+// String() returns an empty string for the boolean type, "int" for integers and "string" for
+// strings. It is used in the command line interface to show the argument's type.
 func (opt ConfigurationOptionType) String() string {
 	switch opt {
 	case BoolConfigurationOption:
@@ -47,7 +50,7 @@ type ConfigurationOption struct {
 	Name string
 	// Description represents the help text about the configuration option.
 	Description string
-	// Flag corresponds to the CLI token with "-" prepended.
+	// Flag corresponds to the CLI token with "--" prepended.
 	Flag string
 	// Type specifies the kind of the configuration option's value.
 	Type ConfigurationOptionType
@@ -55,6 +58,8 @@ type ConfigurationOption struct {
 	Default interface{}
 }
 
+// FormatDefault() converts the default value of ConfigurationOption to string.
+// Used in the command line interface to show the argument's default value.
 func (opt ConfigurationOption) FormatDefault() string {
 	if opt.Type != StringConfigurationOption {
 		return fmt.Sprint(opt.Default)
@@ -62,7 +67,7 @@ func (opt ConfigurationOption) FormatDefault() string {
 	return fmt.Sprintf("\"%s\"", opt.Default)
 }
 
-// PipelineItem is the interface for all the units of the Git commit analysis pipeline.
+// PipelineItem is the interface for all the units in the Git commits analysis pipeline.
 type PipelineItem interface {
 	// Name returns the name of the analysis.
 	Name() string
@@ -126,14 +131,19 @@ type CommonAnalysisResult struct {
 	RunTime time.Duration
 }
 
+// BeginTimeAsTime() converts the UNIX timestamp of the beginning to Go time.
 func (car *CommonAnalysisResult) BeginTimeAsTime() time.Time {
 	return time.Unix(car.BeginTime, 0)
 }
 
+// EndTimeAsTime() converts the UNIX timestamp of the ending to Go time.
 func (car *CommonAnalysisResult) EndTimeAsTime() time.Time {
 	return time.Unix(car.EndTime, 0)
 }
 
+// Merge() combines the CommonAnalysisResult with an other one.
+// We choose the earlier BeginTime, the later EndTime, sum the number of commits and the
+// elapsed run times.
 func (car *CommonAnalysisResult) Merge(other *CommonAnalysisResult) {
 	if car.EndTime == 0 || other.BeginTime == 0 {
 		panic("Merging with an uninitialized CommonAnalysisResult")
@@ -148,6 +158,7 @@ func (car *CommonAnalysisResult) Merge(other *CommonAnalysisResult) {
 	car.RunTime += other.RunTime
 }
 
+// FillMetadata() copies the data to a Protobuf message.
 func (car *CommonAnalysisResult) FillMetadata(meta *pb.Metadata) *pb.Metadata {
 	meta.BeginUnixTime = car.BeginTime
 	meta.EndUnixTime = car.EndTime
@@ -156,6 +167,7 @@ func (car *CommonAnalysisResult) FillMetadata(meta *pb.Metadata) *pb.Metadata {
 	return meta
 }
 
+// MetadataToCommonAnalysisResult() copies the data from a Protobuf message.
 func MetadataToCommonAnalysisResult(meta *pb.Metadata) *CommonAnalysisResult {
 	return &CommonAnalysisResult{
 		BeginTime:     meta.BeginUnixTime,
@@ -165,6 +177,8 @@ func MetadataToCommonAnalysisResult(meta *pb.Metadata) *CommonAnalysisResult {
 	}
 }
 
+// The core Hercules entity which carries several PipelineItems and executes them.
+// See the extended example of how a Pipeline works in doc.go.
 type Pipeline struct {
 	// OnProgress is the callback which is invoked in Analyse() to output it's
 	// progress. The first argument is the number of processed commits and the
@@ -186,9 +200,13 @@ type Pipeline struct {
 }
 
 const (
+	// Makes Pipeline to save the DAG to the specified file.
 	ConfigPipelineDumpPath = "Pipeline.DumpPath"
-	ConfigPipelineDryRun   = "Pipeline.DryRun"
-	FactPipelineCommits    = "commits"
+	// Disables Configure() and Initialize() invokation on each PipelineItem during the initialization.
+	// Subsequent Run() calls are going to fail. Useful with ConfigPipelineDumpPath=true.
+	ConfigPipelineDryRun = "Pipeline.DryRun"
+	// Allows to specify the custom commit chain. By default, Pipeline.Commits() is used.
+	FactPipelineCommits = "commits"
 )
 
 func NewPipeline(repository *git.Repository) *Pipeline {