Browse Source

Add LinesStatsCalculator

Signed-off-by: Vadim Markovtsev <vadim@sourced.tech>
Vadim Markovtsev 6 years ago
parent
commit
12832b8674
2 changed files with 291 additions and 0 deletions
  1. 161 0
      internal/plumbing/line_stats.go
  2. 130 0
      internal/plumbing/line_stats_test.go

+ 161 - 0
internal/plumbing/line_stats.go

@@ -0,0 +1,161 @@
+package plumbing
+
+import (
+	"unicode/utf8"
+
+	"github.com/sergi/go-diff/diffmatchpatch"
+	"gopkg.in/src-d/go-git.v4"
+	"gopkg.in/src-d/go-git.v4/plumbing"
+	"gopkg.in/src-d/go-git.v4/plumbing/object"
+	"gopkg.in/src-d/go-git.v4/utils/merkletrie"
+	"gopkg.in/src-d/hercules.v8/internal/core"
+)
+
+// LinesStatsCalculator measures line statistics for each text file in the commit.
+type LinesStatsCalculator struct {
+	core.NoopMerger
+}
+
+// LineStats holds the numbers of inserted, deleted and changed lines.
+type LineStats struct {
+	// Added is the number of added lines by a particular developer in a particular day.
+	Added int
+	// Removed is the number of removed lines by a particular developer in a particular day.
+	Removed int
+	// Changed is the number of changed lines by a particular developer in a particular day.
+	Changed int
+}
+
+const (
+	// DependencyLineStats is the identifier of the data provided by LinesStatsCalculator - line
+	// statistics for each file in the commit.
+	DependencyLineStats = "line_stats"
+)
+
+// Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
+func (lsc *LinesStatsCalculator) Name() string {
+	return "LinesStats"
+}
+
+// Provides returns the list of names of entities which are produced by this PipelineItem.
+// Each produced entity will be inserted into `deps` of dependent Consume()-s according
+// to this list. Also used by core.Registry to build the global map of providers.
+func (lsc *LinesStatsCalculator) Provides() []string {
+	arr := [...]string{DependencyLineStats}
+	return arr[:]
+}
+
+// Requires returns the list of names of entities which are needed by this PipelineItem.
+// Each requested entity will be inserted into `deps` of Consume(). In turn, those
+// entities are Provides() upstream.
+func (lsc *LinesStatsCalculator) Requires() []string {
+	arr := [...]string{DependencyTreeChanges, DependencyBlobCache, DependencyFileDiff}
+	return arr[:]
+}
+
+// ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
+func (lsc *LinesStatsCalculator) ListConfigurationOptions() []core.ConfigurationOption {
+	return nil
+}
+
+// Configure sets the properties previously published by ListConfigurationOptions().
+func (lsc *LinesStatsCalculator) Configure(facts map[string]interface{}) error {
+	return nil
+}
+
+// Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
+// calls. The repository which is going to be analysed is supplied as an argument.
+func (lsc *LinesStatsCalculator) Initialize(repository *git.Repository) error {
+	return nil
+}
+
+// Consume runs this PipelineItem on the next commit data.
+// `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
+// Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
+// This function returns the mapping with analysis results. The keys must be the same as
+// in Provides(). If there was an error, nil is returned.
+func (lsc *LinesStatsCalculator) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
+	result := map[object.ChangeEntry]LineStats{}
+	if deps[core.DependencyIsMerge].(bool) {
+		// we ignore merge commit diffs
+		// TODO(vmarkovtsev): handle them better
+		return map[string]interface{}{DependencyLineStats: result}, nil
+	}
+	treeDiff := deps[DependencyTreeChanges].(object.Changes)
+	cache := deps[DependencyBlobCache].(map[plumbing.Hash]*CachedBlob)
+	fileDiffs := deps[DependencyFileDiff].(map[string]FileDiffData)
+	for _, change := range treeDiff {
+		action, err := change.Action()
+		if err != nil {
+			return nil, err
+		}
+		switch action {
+		case merkletrie.Insert:
+			blob := cache[change.To.TreeEntry.Hash]
+			lines, err := blob.CountLines()
+			if err != nil {
+				// binary
+				continue
+			}
+			result[change.To] = LineStats{
+				Added:   lines,
+				Removed: 0,
+				Changed: 0,
+			}
+		case merkletrie.Delete:
+			blob := cache[change.From.TreeEntry.Hash]
+			lines, err := blob.CountLines()
+			if err != nil {
+				// binary
+				continue
+			}
+			result[change.From] = LineStats{
+				Added:   0,
+				Removed: lines,
+				Changed: 0,
+			}
+		case merkletrie.Modify:
+			thisDiffs := fileDiffs[change.To.Name]
+			var added, removed, changed, removedPending int
+			for _, edit := range thisDiffs.Diffs {
+				switch edit.Type {
+				case diffmatchpatch.DiffEqual:
+					if removedPending > 0 {
+						removed += removedPending
+					}
+					removedPending = 0
+				case diffmatchpatch.DiffInsert:
+					delta := utf8.RuneCountInString(edit.Text)
+					if removedPending > delta {
+						changed += delta
+						removed += removedPending - delta
+					} else {
+						changed += removedPending
+						added += delta - removedPending
+					}
+					removedPending = 0
+				case diffmatchpatch.DiffDelete:
+					removedPending = utf8.RuneCountInString(edit.Text)
+				}
+			}
+			if removedPending > 0 {
+				removed += removedPending
+			}
+			result[change.To] = LineStats{
+				Added:   added,
+				Removed: removed,
+				Changed: changed,
+			}
+		}
+	}
+	return map[string]interface{}{DependencyLineStats: result}, nil
+}
+
+// Fork clones this PipelineItem.
+func (lsc *LinesStatsCalculator) Fork(n int) []core.PipelineItem {
+	return core.ForkSamePipelineItem(lsc, n)
+}
+
+func init() {
+	core.Registry.Register(&LinesStatsCalculator{})
+}

+ 130 - 0
internal/plumbing/line_stats_test.go

@@ -0,0 +1,130 @@
+package plumbing_test
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"gopkg.in/src-d/go-git.v4/plumbing"
+	"gopkg.in/src-d/go-git.v4/plumbing/object"
+	"gopkg.in/src-d/hercules.v8/internal/core"
+	items "gopkg.in/src-d/hercules.v8/internal/plumbing"
+	"gopkg.in/src-d/hercules.v8/internal/plumbing/identity"
+	"gopkg.in/src-d/hercules.v8/internal/test"
+	"gopkg.in/src-d/hercules.v8/internal/test/fixtures"
+)
+
+func TestLinesStatsMeta(t *testing.T) {
+	ra := &items.LinesStatsCalculator{}
+	assert.Equal(t, ra.Name(), "LinesStats")
+	assert.Equal(t, len(ra.Provides()), 1)
+	assert.Equal(t, ra.Provides()[0], items.DependencyLineStats)
+	assert.Equal(t, len(ra.Requires()), 3)
+	assert.Equal(t, ra.Requires()[0], items.DependencyTreeChanges)
+	assert.Equal(t, ra.Requires()[1], items.DependencyBlobCache)
+	assert.Equal(t, ra.Requires()[2], items.DependencyFileDiff)
+	assert.Nil(t, ra.ListConfigurationOptions())
+	assert.Nil(t, ra.Configure(nil))
+	for _, f := range ra.Fork(10) {
+		assert.Equal(t, f, ra)
+	}
+}
+
+func TestLinesStatsRegistration(t *testing.T) {
+	summoned := core.Registry.Summon((&items.LinesStatsCalculator{}).Name())
+	assert.Len(t, summoned, 1)
+	assert.Equal(t, summoned[0].Name(), "LinesStats")
+	summoned = core.Registry.Summon((&items.LinesStatsCalculator{}).Provides()[0])
+	assert.True(t, len(summoned) >= 1)
+	matched := false
+	for _, tp := range summoned {
+		matched = matched || tp.Name() == "LinesStats"
+	}
+	assert.True(t, matched)
+}
+
+func TestLinesStatsConsume(t *testing.T) {
+	deps := map[string]interface{}{}
+
+	// stage 1
+	deps[identity.DependencyAuthor] = 0
+	cache := map[plumbing.Hash]*items.CachedBlob{}
+	items.AddHash(t, cache, "291286b4ac41952cbd1389fda66420ec03c1a9fe")
+	items.AddHash(t, cache, "c29112dbd697ad9b401333b80c18a63951bc18d9")
+	items.AddHash(t, cache, "baa64828831d174f40140e4b3cfa77d1e917a2c1")
+	items.AddHash(t, cache, "dc248ba2b22048cc730c571a748e8ffcf7085ab9")
+	deps[items.DependencyBlobCache] = cache
+	changes := make(object.Changes, 3)
+	treeFrom, _ := test.Repository.TreeObject(plumbing.NewHash(
+		"a1eb2ea76eb7f9bfbde9b243861474421000eb96"))
+	treeTo, _ := test.Repository.TreeObject(plumbing.NewHash(
+		"994eac1cd07235bb9815e547a75c84265dea00f5"))
+	changes[0] = &object.Change{From: object.ChangeEntry{
+		Name: "analyser.go",
+		Tree: treeFrom,
+		TreeEntry: object.TreeEntry{
+			Name: "analyser.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("dc248ba2b22048cc730c571a748e8ffcf7085ab9"),
+		},
+	}, To: object.ChangeEntry{
+		Name: "analyser2.go",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: "analyser2.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("baa64828831d174f40140e4b3cfa77d1e917a2c1"),
+		},
+	}}
+	changes[1] = &object.Change{From: object.ChangeEntry{}, To: object.ChangeEntry{
+		Name: "cmd/hercules/main.go",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: "cmd/hercules/main.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("c29112dbd697ad9b401333b80c18a63951bc18d9"),
+		},
+	},
+	}
+	changes[2] = &object.Change{From: object.ChangeEntry{
+		Name: ".travis.yml",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: ".travis.yml",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("291286b4ac41952cbd1389fda66420ec03c1a9fe"),
+		},
+	}, To: object.ChangeEntry{},
+	}
+	deps[items.DependencyTreeChanges] = changes
+	fd := fixtures.FileDiff()
+	result, err := fd.Consume(deps)
+	assert.Nil(t, err)
+	deps[items.DependencyFileDiff] = result[items.DependencyFileDiff]
+	deps[core.DependencyCommit], _ = test.Repository.CommitObject(plumbing.NewHash(
+		"cce947b98a050c6d356bc6ba95030254914027b1"))
+	deps[core.DependencyIsMerge] = false
+	lsc := &items.LinesStatsCalculator{}
+	result, err = lsc.Consume(deps)
+	assert.Nil(t, err)
+	stats := result[items.DependencyLineStats].(map[object.ChangeEntry]items.LineStats)
+	assert.Len(t, stats, 3)
+	nameMap := map[string]items.LineStats{}
+	for ch, val := range stats {
+		nameMap[ch.Name] = val
+	}
+	assert.Equal(t, nameMap["analyser2.go"], items.LineStats{
+		Added:   628,
+		Removed: 9,
+		Changed: 67,
+	})
+	assert.Equal(t, nameMap[".travis.yml"], items.LineStats{
+		Added:   0,
+		Removed: 12,
+		Changed: 0,
+	})
+	assert.Equal(t, nameMap["cmd/hercules/main.go"], items.LineStats{
+		Added:   207,
+		Removed: 0,
+		Changed: 0,
+	})
+}