浏览代码

Add some docs

Vadim Markovtsev 7 年之前
父节点
当前提交
534edfc9eb
共有 4 个文件被更改,包括 80 次插入27 次删除
  1. 1 1
      README.md
  2. 10 0
      blob_cache.go
  3. 47 22
      burndown.go
  4. 22 4
      pipeline.go

+ 1 - 1
README.md

@@ -167,7 +167,7 @@ hercules --couples [-people-dict=/path/to/identities]
 python3 labours.py -m couples -o <name> [--couples-tmp-dir=/tmp]
 python3 labours.py -m couples -o <name> [--couples-tmp-dir=/tmp]
 ```
 ```
 
 
-**Important**: it requires Tensorflow to be installed, please follow [official instuctions](https://www.tensorflow.org/install/).
+**Important**: it requires Tensorflow to be installed, please follow [official instructions](https://www.tensorflow.org/install/).
 
 
 The files are coupled if they are changed in the same commit. The developers are coupled if they
 The files are coupled if they are changed in the same commit. The developers are coupled if they
 change the same file. `hercules` records the number of couples throught the whole commti history
 change the same file. `hercules` records the number of couples throught the whole commti history

+ 10 - 0
blob_cache.go

@@ -11,7 +11,13 @@ import (
 	"gopkg.in/src-d/go-git.v4/utils/merkletrie"
 	"gopkg.in/src-d/go-git.v4/utils/merkletrie"
 )
 )
 
 
+// This PipelineItem loads the blobs which correspond to the changed files in a commit.
+// It must provide the old and the new objects; "cache" rotates and allows to not load
+// the same blobs twice. Outdated objects are removed so "cache" never grows big.
 type BlobCache struct {
 type BlobCache struct {
+	// Specifies how to handle the situation when we encounter a git submodule - an object without
+	// the blob. If false, we look inside .gitmodules and if don't find, raise an error.
+	// If true, we do not look inside .gitmodules and always succeed.
 	IgnoreMissingSubmodules bool
 	IgnoreMissingSubmodules bool
 
 
 	repository *git.Repository
 	repository *git.Repository
@@ -119,8 +125,12 @@ func (self *BlobCache) Consume(deps map[string]interface{}) (map[string]interfac
 	return map[string]interface{}{DependencyBlobCache: cache}, nil
 	return map[string]interface{}{DependencyBlobCache: cache}, nil
 }
 }
 
 
+// The definition of a function which loads a git file by the specified path.
+// The state can be arbitrary though here it always corresponds to the currently processed
+// commit.
 type FileGetter func(path string) (*object.File, error)
 type FileGetter func(path string) (*object.File, error)
 
 
+// Returns the blob which corresponds to the specified ChangeEntry.
 func (cache *BlobCache) getBlob(entry *object.ChangeEntry, fileGetter FileGetter) (
 func (cache *BlobCache) getBlob(entry *object.ChangeEntry, fileGetter FileGetter) (
 	*object.Blob, error) {
 	*object.Blob, error) {
 	blob, err := cache.repository.BlobObject(entry.TreeEntry.Hash)
 	blob, err := cache.repository.BlobObject(entry.TreeEntry.Hash)

+ 47 - 22
burndown.go

@@ -20,18 +20,18 @@ import (
 )
 )
 
 
 // BurndownAnalyser allows to gather the line burndown statistics for a Git repository.
 // BurndownAnalyser allows to gather the line burndown statistics for a Git repository.
+// Reference: https://erikbern.com/2016/12/05/the-half-life-of-code.html
 type BurndownAnalysis struct {
 type BurndownAnalysis struct {
 	// Granularity sets the size of each band - the number of days it spans.
 	// Granularity sets the size of each band - the number of days it spans.
 	// Smaller values provide better resolution but require more work and eat more
 	// Smaller values provide better resolution but require more work and eat more
 	// memory. 30 days is usually enough.
 	// memory. 30 days is usually enough.
 	Granularity int
 	Granularity int
 	// Sampling sets how detailed is the statistic - the size of the interval in
 	// Sampling sets how detailed is the statistic - the size of the interval in
-	// days between consecutive measurements. It is usually a good idea to set it
-	// <= Granularity. Try 15 or 30.
+	// days between consecutive measurements. It may not be greater than Granularity. Try 15 or 30.
 	Sampling int
 	Sampling int
 
 
 	// TrackFiles enables or disables the fine-grained per-file burndown analysis.
 	// TrackFiles enables or disables the fine-grained per-file burndown analysis.
-	// It does not change the top level burndown results.
+	// It does not change the project level burndown results.
 	TrackFiles bool
 	TrackFiles bool
 
 
 	// The number of developers for which to collect the burndown stats. 0 disables it.
 	// The number of developers for which to collect the burndown stats. 0 disables it.
@@ -47,11 +47,11 @@ type BurndownAnalysis struct {
 	// globalStatus is the current daily alive number of lines; key is the number
 	// globalStatus is the current daily alive number of lines; key is the number
 	// of days from the beginning of the history.
 	// of days from the beginning of the history.
 	globalStatus map[int]int64
 	globalStatus map[int]int64
-	// globalHistory is the weekly snapshots of globalStatus.
+	// globalHistory is the periodic snapshots of globalStatus.
 	globalHistory [][]int64
 	globalHistory [][]int64
-	// fileHistories is the weekly snapshots of each file's status.
+	// fileHistories is the periodic snapshots of each file's status.
 	fileHistories map[string][][]int64
 	fileHistories map[string][][]int64
-	// peopleHistories is the weekly snapshots of each person's status.
+	// peopleHistories is the periodic snapshots of each person's status.
 	peopleHistories [][][]int64
 	peopleHistories [][][]int64
 	// files is the mapping <file path> -> *File.
 	// files is the mapping <file path> -> *File.
 	files map[string]*File
 	files map[string]*File
@@ -68,21 +68,44 @@ type BurndownAnalysis struct {
 	reversedPeopleDict []string
 	reversedPeopleDict []string
 }
 }
 
 
+// Carries the result of running BurndownAnalysis - it is returned by BurndownAnalysis.Finalize().
 type BurndownResult struct {
 type BurndownResult struct {
-	GlobalHistory      [][]int64
-	FileHistories      map[string][][]int64
-	PeopleHistories    [][][]int64
-	PeopleMatrix       [][]int64
+	// [number of samples][number of bands]
+	// The number of samples depends on Sampling: the less Sampling, the bigger the number.
+	// The number of bands depends on Granularity: the less Granularity, the bigger the number.
+	GlobalHistory [][]int64
+	// The key is the path inside the Git repository. The value's dimensions are the same as
+	// in GlobalHistory.
+	FileHistories map[string][][]int64
+	// [number of people][number of samples][number of bands]
+	PeopleHistories [][][]int64
+	// [number of people][number of people + 2]
+	// The first element is the total number of lines added by the author.
+	// The second element is the number of removals by unidentified authors (outside reversedPeopleDict).
+	// The rest of the elements are equal the number of line removals by the corresponding
+	// authors in reversedPeopleDict: 2 -> 0, 3 -> 1, etc.
+	PeopleMatrix [][]int64
+
+	// The following members are private.
+
+	// reversedPeopleDict is borrowed from IdentityDetector and becomes available after
+	// Pipeline.Initialize(facts map[string]interface{}). Thus it can be obtained via
+	// facts[FactIdentityDetectorReversedPeopleDict].
 	reversedPeopleDict []string
 	reversedPeopleDict []string
-	sampling           int
-	granularity        int
+	// sampling and granularity are copied from BurndownAnalysis and stored for service purposes
+	// such as merging several results together.
+	sampling    int
+	granularity int
 }
 }
 
 
 const (
 const (
-	ConfigBurndownGranularity  = "Burndown.Granularity"
-	ConfigBurndownSampling     = "Burndown.Sampling"
-	ConfigBurndownTrackFiles   = "Burndown.TrackFiles"
-	ConfigBurndownTrackPeople  = "Burndown.TrackPeople"
+	ConfigBurndownGranularity = "Burndown.Granularity"
+	ConfigBurndownSampling    = "Burndown.Sampling"
+	// Measuring individual files is optional and false by default.
+	ConfigBurndownTrackFiles = "Burndown.TrackFiles"
+	// Measuring authors is optional and false by default.
+	ConfigBurndownTrackPeople = "Burndown.TrackPeople"
+	// Enables some extra debug assertions.
 	ConfigBurndownDebug        = "Burndown.Debug"
 	ConfigBurndownDebug        = "Burndown.Debug"
 	DefaultBurndownGranularity = 30
 	DefaultBurndownGranularity = 30
 )
 )
@@ -221,12 +244,7 @@ func (analyser *BurndownAnalysis) Consume(deps map[string]interface{}) (map[stri
 	return nil, nil
 	return nil, nil
 }
 }
 
 
-// Finalize() returns the list of snapshots of the cumulative line edit times
-// and the similar lists for every file which is alive in HEAD.
-// The number of snapshots (the first dimension >[]<[]int64) depends on
-// Analyser.Sampling (the more Sampling, the less the value); the length of
-// each snapshot depends on Analyser.Granularity (the more Granularity,
-// the less the value).
+// Finalize() returns BurndownResult.
 func (analyser *BurndownAnalysis) Finalize() interface{} {
 func (analyser *BurndownAnalysis) Finalize() interface{} {
 	gs, fss, pss := analyser.groupStatus()
 	gs, fss, pss := analyser.groupStatus()
 	analyser.updateHistories(gs, fss, pss, 1)
 	analyser.updateHistories(gs, fss, pss, 1)
@@ -446,6 +464,9 @@ func (analyser *BurndownAnalysis) MergeResults(
 	return merged
 	return merged
 }
 }
 
 
+// mergeMatrices takes two [number of samples][number of bands] matrices,
+// resamples them to days so that they become square, sums and resamples back to the
+// least of (sampling1, sampling2) and (granularity1, granularity2).
 func mergeMatrices(m1, m2 [][]int64, granularity1, sampling1, granularity2, sampling2 int,
 func mergeMatrices(m1, m2 [][]int64, granularity1, sampling1, granularity2, sampling2 int,
 	c1, c2 *CommonAnalysisResult) [][]int64 {
 	c1, c2 *CommonAnalysisResult) [][]int64 {
 	commonMerged := *c1
 	commonMerged := *c1
@@ -735,6 +756,10 @@ func checkClose(c io.Closer) {
 	}
 	}
 }
 }
 
 
+// We do a hack and store the day in the first 14 bits and the author index in the last 18.
+// Strictly speaking, int can be 64-bit and then the author index occupies 32+18 bits.
+// This hack is needed to simplify the values storage inside File-s. We can compare
+// different values together and they are compared as days for the same author.
 func (analyser *BurndownAnalysis) packPersonWithDay(person int, day int) int {
 func (analyser *BurndownAnalysis) packPersonWithDay(person int, day int) int {
 	if analyser.PeopleNumber == 0 {
 	if analyser.PeopleNumber == 0 {
 		return day
 		return day

+ 22 - 4
pipeline.go

@@ -18,6 +18,7 @@ import (
 	"gopkg.in/src-d/hercules.v3/toposort"
 	"gopkg.in/src-d/hercules.v3/toposort"
 )
 )
 
 
+// ConfigurationOptionType represents the possible types of a ConfigurationOption's value.
 type ConfigurationOptionType int
 type ConfigurationOptionType int
 
 
 const (
 const (
@@ -29,6 +30,8 @@ const (
 	StringConfigurationOption
 	StringConfigurationOption
 )
 )
 
 
+// String() returns an empty string for the boolean type, "int" for integers and "string" for
+// strings. It is used in the command line interface to show the argument's type.
 func (opt ConfigurationOptionType) String() string {
 func (opt ConfigurationOptionType) String() string {
 	switch opt {
 	switch opt {
 	case BoolConfigurationOption:
 	case BoolConfigurationOption:
@@ -47,7 +50,7 @@ type ConfigurationOption struct {
 	Name string
 	Name string
 	// Description represents the help text about the configuration option.
 	// Description represents the help text about the configuration option.
 	Description string
 	Description string
-	// Flag corresponds to the CLI token with "-" prepended.
+	// Flag corresponds to the CLI token with "--" prepended.
 	Flag string
 	Flag string
 	// Type specifies the kind of the configuration option's value.
 	// Type specifies the kind of the configuration option's value.
 	Type ConfigurationOptionType
 	Type ConfigurationOptionType
@@ -55,6 +58,8 @@ type ConfigurationOption struct {
 	Default interface{}
 	Default interface{}
 }
 }
 
 
+// FormatDefault() converts the default value of ConfigurationOption to string.
+// Used in the command line interface to show the argument's default value.
 func (opt ConfigurationOption) FormatDefault() string {
 func (opt ConfigurationOption) FormatDefault() string {
 	if opt.Type != StringConfigurationOption {
 	if opt.Type != StringConfigurationOption {
 		return fmt.Sprint(opt.Default)
 		return fmt.Sprint(opt.Default)
@@ -62,7 +67,7 @@ func (opt ConfigurationOption) FormatDefault() string {
 	return fmt.Sprintf("\"%s\"", opt.Default)
 	return fmt.Sprintf("\"%s\"", opt.Default)
 }
 }
 
 
-// PipelineItem is the interface for all the units of the Git commit analysis pipeline.
+// PipelineItem is the interface for all the units in the Git commits analysis pipeline.
 type PipelineItem interface {
 type PipelineItem interface {
 	// Name returns the name of the analysis.
 	// Name returns the name of the analysis.
 	Name() string
 	Name() string
@@ -126,14 +131,19 @@ type CommonAnalysisResult struct {
 	RunTime time.Duration
 	RunTime time.Duration
 }
 }
 
 
+// BeginTimeAsTime() converts the UNIX timestamp of the beginning to Go time.
 func (car *CommonAnalysisResult) BeginTimeAsTime() time.Time {
 func (car *CommonAnalysisResult) BeginTimeAsTime() time.Time {
 	return time.Unix(car.BeginTime, 0)
 	return time.Unix(car.BeginTime, 0)
 }
 }
 
 
+// EndTimeAsTime() converts the UNIX timestamp of the ending to Go time.
 func (car *CommonAnalysisResult) EndTimeAsTime() time.Time {
 func (car *CommonAnalysisResult) EndTimeAsTime() time.Time {
 	return time.Unix(car.EndTime, 0)
 	return time.Unix(car.EndTime, 0)
 }
 }
 
 
+// Merge() combines the CommonAnalysisResult with an other one.
+// We choose the earlier BeginTime, the later EndTime, sum the number of commits and the
+// elapsed run times.
 func (car *CommonAnalysisResult) Merge(other *CommonAnalysisResult) {
 func (car *CommonAnalysisResult) Merge(other *CommonAnalysisResult) {
 	if car.EndTime == 0 || other.BeginTime == 0 {
 	if car.EndTime == 0 || other.BeginTime == 0 {
 		panic("Merging with an uninitialized CommonAnalysisResult")
 		panic("Merging with an uninitialized CommonAnalysisResult")
@@ -148,6 +158,7 @@ func (car *CommonAnalysisResult) Merge(other *CommonAnalysisResult) {
 	car.RunTime += other.RunTime
 	car.RunTime += other.RunTime
 }
 }
 
 
+// FillMetadata() copies the data to a Protobuf message.
 func (car *CommonAnalysisResult) FillMetadata(meta *pb.Metadata) *pb.Metadata {
 func (car *CommonAnalysisResult) FillMetadata(meta *pb.Metadata) *pb.Metadata {
 	meta.BeginUnixTime = car.BeginTime
 	meta.BeginUnixTime = car.BeginTime
 	meta.EndUnixTime = car.EndTime
 	meta.EndUnixTime = car.EndTime
@@ -156,6 +167,7 @@ func (car *CommonAnalysisResult) FillMetadata(meta *pb.Metadata) *pb.Metadata {
 	return meta
 	return meta
 }
 }
 
 
+// MetadataToCommonAnalysisResult() copies the data from a Protobuf message.
 func MetadataToCommonAnalysisResult(meta *pb.Metadata) *CommonAnalysisResult {
 func MetadataToCommonAnalysisResult(meta *pb.Metadata) *CommonAnalysisResult {
 	return &CommonAnalysisResult{
 	return &CommonAnalysisResult{
 		BeginTime:     meta.BeginUnixTime,
 		BeginTime:     meta.BeginUnixTime,
@@ -165,6 +177,8 @@ func MetadataToCommonAnalysisResult(meta *pb.Metadata) *CommonAnalysisResult {
 	}
 	}
 }
 }
 
 
+// The core Hercules entity which carries several PipelineItems and executes them.
+// See the extended example of how a Pipeline works in doc.go.
 type Pipeline struct {
 type Pipeline struct {
 	// OnProgress is the callback which is invoked in Analyse() to output it's
 	// OnProgress is the callback which is invoked in Analyse() to output it's
 	// progress. The first argument is the number of processed commits and the
 	// progress. The first argument is the number of processed commits and the
@@ -186,9 +200,13 @@ type Pipeline struct {
 }
 }
 
 
 const (
 const (
+	// Makes Pipeline to save the DAG to the specified file.
 	ConfigPipelineDumpPath = "Pipeline.DumpPath"
 	ConfigPipelineDumpPath = "Pipeline.DumpPath"
-	ConfigPipelineDryRun   = "Pipeline.DryRun"
-	FactPipelineCommits    = "commits"
+	// Disables Configure() and Initialize() invokation on each PipelineItem during the initialization.
+	// Subsequent Run() calls are going to fail. Useful with ConfigPipelineDumpPath=true.
+	ConfigPipelineDryRun = "Pipeline.DryRun"
+	// Allows to specify the custom commit chain. By default, Pipeline.Commits() is used.
+	FactPipelineCommits = "commits"
 )
 )
 
 
 func NewPipeline(repository *git.Repository) *Pipeline {
 func NewPipeline(repository *git.Repository) *Pipeline {