Browse Source

Extract diff from burndown.go

Vadim Markovtsev 7 years ago
parent
commit
4e0cdc1abd
7 changed files with 291 additions and 42 deletions
  1. 17 38
      burndown.go
  2. 7 3
      burndown_test.go
  3. 1 0
      cmd/hercules/main.go
  4. 95 0
      diff.go
  5. 169 0
      diff_test.go
  6. 1 0
      doc.go
  7. 1 1
      pipeline_test.go

+ 17 - 38
burndown.go

@@ -2,7 +2,6 @@ package hercules
 
 
 import (
 import (
 	"bufio"
 	"bufio"
-	"bytes"
 	"errors"
 	"errors"
 	"fmt"
 	"fmt"
 	"io"
 	"io"
@@ -75,7 +74,7 @@ func (analyser *BurndownAnalysis) Provides() []string {
 }
 }
 
 
 func (analyser *BurndownAnalysis) Requires() []string {
 func (analyser *BurndownAnalysis) Requires() []string {
-	arr := [...]string{"renamed_changes", "blob_cache", "day", "author"}
+	arr := [...]string{"file_diff", "renamed_changes", "blob_cache", "day", "author"}
 	return arr[:]
 	return arr[:]
 }
 }
 
 
@@ -106,8 +105,9 @@ func (analyser *BurndownAnalysis) Consume(deps map[string]interface{}) (map[stri
 		analyser.updateHistories(gs, fss, pss, delta)
 		analyser.updateHistories(gs, fss, pss, delta)
 	}
 	}
 	cache := deps["blob_cache"].(map[plumbing.Hash]*object.Blob)
 	cache := deps["blob_cache"].(map[plumbing.Hash]*object.Blob)
-	tree_diff := deps["renamed_changes"].(object.Changes)
-	for _, change := range tree_diff {
+	treeDiffs := deps["renamed_changes"].(object.Changes)
+	fileDiffs := deps["file_diff"].(map[string]FileDiffData)
+	for _, change := range treeDiffs {
 		action, err := change.Action()
 		action, err := change.Action()
 		if err != nil {
 		if err != nil {
 			return nil, err
 			return nil, err
@@ -118,7 +118,7 @@ func (analyser *BurndownAnalysis) Consume(deps map[string]interface{}) (map[stri
 		case merkletrie.Delete:
 		case merkletrie.Delete:
 			err = analyser.handleDeletion(change, author, cache)
 			err = analyser.handleDeletion(change, author, cache)
 		case merkletrie.Modify:
 		case merkletrie.Modify:
-			err = analyser.handleModification(change, author, cache)
+			err = analyser.handleModification(change, author, cache, fileDiffs)
 		}
 		}
 		if err != nil {
 		if err != nil {
 			return nil, err
 			return nil, err
@@ -197,17 +197,6 @@ func countLines(file *object.Blob) (int, error) {
 	return counter, nil
 	return counter, nil
 }
 }
 
 
-func blobToString(file *object.Blob) (string, error) {
-	reader, err := file.Reader()
-	if err != nil {
-		return "", err
-	}
-	defer checkClose(reader)
-	buf := new(bytes.Buffer)
-	buf.ReadFrom(reader)
-	return buf.String(), nil
-}
-
 func (analyser *BurndownAnalysis) packPersonWithDay(person int, day int) int {
 func (analyser *BurndownAnalysis) packPersonWithDay(person int, day int) int {
 	if analyser.PeopleNumber == 0 {
 	if analyser.PeopleNumber == 0 {
 		return day
 		return day
@@ -325,40 +314,30 @@ func (analyser *BurndownAnalysis) handleDeletion(
 }
 }
 
 
 func (analyser *BurndownAnalysis) handleModification(
 func (analyser *BurndownAnalysis) handleModification(
-	change *object.Change, author int, cache map[plumbing.Hash]*object.Blob) error {
+	change *object.Change, author int, cache map[plumbing.Hash]*object.Blob,
+  diffs map[string]FileDiffData) error {
 
 
-	blob_from := cache[change.From.TreeEntry.Hash]
-	blob_to := cache[change.To.TreeEntry.Hash]
-	// we are not validating UTF-8 here because for example
-	// git/git 4f7770c87ce3c302e1639a7737a6d2531fe4b160 fetch-pack.c is invalid UTF-8
-	str_from, err := blobToString(blob_from)
-	if err != nil {
-		return err
-	}
-	str_to, err := blobToString(blob_to)
-	if err != nil {
-		return err
-	}
 	file, exists := analyser.files[change.From.Name]
 	file, exists := analyser.files[change.From.Name]
 	if !exists {
 	if !exists {
 		return analyser.handleInsertion(change, author, cache)
 		return analyser.handleInsertion(change, author, cache)
 	}
 	}
+
 	// possible rename
 	// possible rename
 	if change.To.Name != change.From.Name {
 	if change.To.Name != change.From.Name {
-		err = analyser.handleRename(change.From.Name, change.To.Name)
+		err := analyser.handleRename(change.From.Name, change.To.Name)
 		if err != nil {
 		if err != nil {
 			return err
 			return err
 		}
 		}
 	}
 	}
-	dmp := diffmatchpatch.New()
-	src, dst, _ := dmp.DiffLinesToRunes(str_from, str_to)
-	if file.Len() != len(src) {
+
+	thisDiffs := diffs[change.To.Name]
+	if file.Len() != thisDiffs.OldLinesOfCode {
 		fmt.Fprintf(os.Stderr, "====TREE====\n%s", file.Dump())
 		fmt.Fprintf(os.Stderr, "====TREE====\n%s", file.Dump())
 		return errors.New(fmt.Sprintf("%s: internal integrity error src %d != %d %s -> %s",
 		return errors.New(fmt.Sprintf("%s: internal integrity error src %d != %d %s -> %s",
-			change.To.Name, len(src), file.Len(),
+			change.To.Name, thisDiffs.OldLinesOfCode, file.Len(),
 			change.From.TreeEntry.Hash.String(), change.To.TreeEntry.Hash.String()))
 			change.From.TreeEntry.Hash.String(), change.To.TreeEntry.Hash.String()))
 	}
 	}
-	diffs := dmp.DiffMainRunes(src, dst, false)
+
 	// we do not call RunesToDiffLines so the number of lines equals
 	// we do not call RunesToDiffLines so the number of lines equals
 	// to the rune count
 	// to the rune count
 	position := 0
 	position := 0
@@ -377,7 +356,7 @@ func (analyser *BurndownAnalysis) handleModification(
 		}
 		}
 	}
 	}
 
 
-	for _, edit := range diffs {
+	for _, edit := range thisDiffs.Diffs {
 		dump_before := ""
 		dump_before := ""
 		if analyser.Debug {
 		if analyser.Debug {
 			dump_before = file.Dump()
 			dump_before = file.Dump()
@@ -430,9 +409,9 @@ func (analyser *BurndownAnalysis) handleModification(
 		apply(pending)
 		apply(pending)
 		pending.Text = ""
 		pending.Text = ""
 	}
 	}
-	if file.Len() != len(dst) {
+	if file.Len() != thisDiffs.NewLinesOfCode {
 		return errors.New(fmt.Sprintf("%s: internal integrity error dst %d != %d",
 		return errors.New(fmt.Sprintf("%s: internal integrity error dst %d != %d",
-			change.To.Name, len(dst), file.Len()))
+			change.To.Name, thisDiffs.NewLinesOfCode, file.Len()))
 	}
 	}
 	return nil
 	return nil
 }
 }

+ 7 - 3
burndown_test.go

@@ -12,7 +12,7 @@ func TestBurndownMeta(t *testing.T) {
 	burndown := BurndownAnalysis{}
 	burndown := BurndownAnalysis{}
 	assert.Equal(t, burndown.Name(), "Burndown")
 	assert.Equal(t, burndown.Name(), "Burndown")
 	assert.Equal(t, len(burndown.Provides()), 0)
 	assert.Equal(t, len(burndown.Provides()), 0)
-	required := [...]string{"renamed_changes", "blob_cache", "day", "author"}
+	required := [...]string{"file_diff", "renamed_changes", "blob_cache", "day", "author"}
 	for _, name := range required {
 	for _, name := range required {
 		assert.Contains(t, burndown.Requires(), name)
 		assert.Contains(t, burndown.Requires(), name)
 	}
 	}
@@ -40,7 +40,7 @@ func TestBurndownConsume(t *testing.T) {
 	treeFrom, _ := testRepository.TreeObject(plumbing.NewHash(
 	treeFrom, _ := testRepository.TreeObject(plumbing.NewHash(
 		"a1eb2ea76eb7f9bfbde9b243861474421000eb96"))
 		"a1eb2ea76eb7f9bfbde9b243861474421000eb96"))
 	treeTo, _ := testRepository.TreeObject(plumbing.NewHash(
 	treeTo, _ := testRepository.TreeObject(plumbing.NewHash(
-		"4d3f9500c2b9dc10925ad1705926b67f0f9101ca"))
+		"994eac1cd07235bb9815e547a75c84265dea00f5"))
 	changes[0] = &object.Change{From: object.ChangeEntry{
 	changes[0] = &object.Change{From: object.ChangeEntry{
 		Name: "analyser.go",
 		Name: "analyser.go",
 		Tree: treeFrom,
 		Tree: treeFrom,
@@ -69,7 +69,11 @@ func TestBurndownConsume(t *testing.T) {
 	},
 	},
 	}
 	}
 	deps["renamed_changes"] = changes
 	deps["renamed_changes"] = changes
-	result, err := burndown.Consume(deps)
+	fd := fixtureFileDiff()
+	result, err := fd.Consume(deps)
+	assert.Nil(t, err)
+	deps["file_diff"] = result["file_diff"]
+	result, err = burndown.Consume(deps)
 	assert.Nil(t, result)
 	assert.Nil(t, result)
 	assert.Nil(t, err)
 	assert.Nil(t, err)
 	assert.Equal(t, burndown.previousDay, 0)
 	assert.Equal(t, burndown.previousDay, 0)

+ 1 - 0
cmd/hercules/main.go

@@ -153,6 +153,7 @@ func main() {
 	pipeline.AddItem(&hercules.DaysSinceStart{})
 	pipeline.AddItem(&hercules.DaysSinceStart{})
 	pipeline.AddItem(&hercules.RenameAnalysis{SimilarityThreshold: similarity_threshold})
 	pipeline.AddItem(&hercules.RenameAnalysis{SimilarityThreshold: similarity_threshold})
 	pipeline.AddItem(&hercules.TreeDiff{})
 	pipeline.AddItem(&hercules.TreeDiff{})
+	pipeline.AddItem(&hercules.FileDiff{})
 	id_matcher := &hercules.IdentityDetector{}
 	id_matcher := &hercules.IdentityDetector{}
 	var peopleCount int
 	var peopleCount int
 	if withPeople || withCouples {
 	if withPeople || withCouples {

+ 95 - 0
diff.go

@@ -0,0 +1,95 @@
+package hercules
+
+import (
+	"bytes"
+	"errors"
+
+	"github.com/sergi/go-diff/diffmatchpatch"
+	"gopkg.in/src-d/go-git.v4"
+	"gopkg.in/src-d/go-git.v4/plumbing/object"
+	"gopkg.in/src-d/go-git.v4/plumbing"
+	"gopkg.in/src-d/go-git.v4/utils/merkletrie"
+)
+
+// FileDiff calculates the difference of files which were modified.
+type FileDiff struct {
+}
+
+type FileDiffData struct {
+	OldLinesOfCode int
+	NewLinesOfCode int
+	Diffs []diffmatchpatch.Diff
+}
+
+func (diff *FileDiff) Name() string {
+	return "FileDiff"
+}
+
+func (diff *FileDiff) Provides() []string {
+	arr := [...]string{"file_diff"}
+	return arr[:]
+}
+
+func (diff *FileDiff) Requires() []string {
+	arr := [...]string{"renamed_changes", "blob_cache"}
+	return arr[:]
+}
+
+func (diff *FileDiff) Initialize(repository *git.Repository) {
+}
+
+func (diff *FileDiff) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
+	result := map[string]FileDiffData{}
+	cache := deps["blob_cache"].(map[plumbing.Hash]*object.Blob)
+	tree_diff := deps["renamed_changes"].(object.Changes)
+	for _, change := range tree_diff {
+		action, err := change.Action()
+		if err != nil {
+			return nil, err
+		}
+		switch action {
+		case merkletrie.Modify:
+			blob_from := cache[change.From.TreeEntry.Hash]
+			blob_to := cache[change.To.TreeEntry.Hash]
+			// we are not validating UTF-8 here because for example
+			// git/git 4f7770c87ce3c302e1639a7737a6d2531fe4b160 fetch-pack.c is invalid UTF-8
+			str_from, err := blobToString(blob_from)
+			if err != nil {
+				return nil, err
+			}
+			str_to, err := blobToString(blob_to)
+			if err != nil {
+				return nil, err
+			}
+			dmp := diffmatchpatch.New()
+			src, dst, _ := dmp.DiffLinesToRunes(str_from, str_to)
+			diffs := dmp.DiffMainRunes(src, dst, false)
+			result[change.To.Name] = FileDiffData{
+				OldLinesOfCode: len(src),
+				NewLinesOfCode: len(dst),
+				Diffs: diffs,
+			}
+		default:
+			continue
+		}
+	}
+	return map[string]interface{}{"file_diff": result}, nil
+}
+
+func (diff *FileDiff) Finalize() interface{} {
+	return nil
+}
+
+func blobToString(file *object.Blob) (string, error) {
+	if file == nil {
+		return "", errors.New("Blob not cached.")
+	}
+	reader, err := file.Reader()
+	if err != nil {
+		return "", err
+	}
+	defer checkClose(reader)
+	buf := new(bytes.Buffer)
+	buf.ReadFrom(reader)
+	return buf.String(), nil
+}

+ 169 - 0
diff_test.go

@@ -0,0 +1,169 @@
+package hercules
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"gopkg.in/src-d/go-git.v4/plumbing"
+	"gopkg.in/src-d/go-git.v4/plumbing/object"
+	"github.com/sergi/go-diff/diffmatchpatch"
+	"unicode/utf8"
+)
+
+func fixtureFileDiff() *FileDiff {
+	fd := &FileDiff{}
+	fd.Initialize(testRepository)
+	return fd
+}
+
+func TestFileDiffMeta(t *testing.T) {
+	fd := fixtureFileDiff()
+	assert.Equal(t, fd.Name(), "FileDiff")
+	assert.Equal(t, len(fd.Provides()), 1)
+	assert.Equal(t, fd.Provides()[0], "file_diff")
+	assert.Equal(t, len(fd.Requires()), 2)
+	assert.Equal(t, fd.Requires()[0], "renamed_changes")
+	assert.Equal(t, fd.Requires()[1], "blob_cache")
+}
+
+func TestFileDiffFinalize(t *testing.T) {
+	fd := fixtureFileDiff()
+	r := fd.Finalize()
+	assert.Nil(t, r)
+}
+
+func TestFileDiffConsume(t *testing.T) {
+	fd := fixtureFileDiff()
+	deps := map[string]interface{}{}
+	cache := map[plumbing.Hash]*object.Blob{}
+	hash := plumbing.NewHash("291286b4ac41952cbd1389fda66420ec03c1a9fe")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	hash = plumbing.NewHash("334cde09da4afcb74f8d2b3e6fd6cce61228b485")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	hash = plumbing.NewHash("dc248ba2b22048cc730c571a748e8ffcf7085ab9")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	deps["blob_cache"] = cache
+	changes := make(object.Changes, 3)
+	treeFrom, _ := testRepository.TreeObject(plumbing.NewHash(
+		"a1eb2ea76eb7f9bfbde9b243861474421000eb96"))
+	treeTo, _ := testRepository.TreeObject(plumbing.NewHash(
+		"994eac1cd07235bb9815e547a75c84265dea00f5"))
+	changes[0] = &object.Change{From: object.ChangeEntry{
+		Name: "analyser.go",
+		Tree: treeFrom,
+		TreeEntry: object.TreeEntry{
+			Name: "analyser.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("dc248ba2b22048cc730c571a748e8ffcf7085ab9"),
+		},
+	}, To: object.ChangeEntry{
+		Name: "analyser.go",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: "analyser.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("334cde09da4afcb74f8d2b3e6fd6cce61228b485"),
+		},
+	}}
+	changes[1] = &object.Change{From: object.ChangeEntry{}, To: object.ChangeEntry{
+		Name: ".travis.yml",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: ".travis.yml",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("291286b4ac41952cbd1389fda66420ec03c1a9fe"),
+		},
+	},
+	}
+	changes[2] = &object.Change{From: object.ChangeEntry{
+		Name: "rbtree.go",
+		Tree: treeFrom,
+		TreeEntry: object.TreeEntry{
+			Name: "rbtree.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("14c3fa5a1cca103032f10379467a3a2f210e5f94"),
+		},
+	}, To: object.ChangeEntry{},
+	}
+	deps["renamed_changes"] = changes
+	res, err := fd.Consume(deps)
+	assert.Nil(t, err)
+	diffs := res["file_diff"].(map[string]FileDiffData)
+	assert.Equal(t, len(diffs), 1)
+	diff := diffs["analyser.go"]
+	assert.Equal(t, diff.OldLinesOfCode, 307)
+	assert.Equal(t, diff.NewLinesOfCode, 309)
+	deletions := 0
+	insertions := 0
+	for _, edit := range diff.Diffs {
+		switch edit.Type {
+		case diffmatchpatch.DiffEqual:
+			continue
+		case diffmatchpatch.DiffInsert:
+			insertions += utf8.RuneCountInString(edit.Text)
+		case diffmatchpatch.DiffDelete:
+			deletions += utf8.RuneCountInString(edit.Text)
+		}
+	}
+	assert.Equal(t, deletions, 13)
+	assert.Equal(t, insertions, 15)
+}
+
+func TestFileDiffConsumeInvalidBlob(t *testing.T) {
+  fd := fixtureFileDiff()
+	deps := map[string]interface{}{}
+	cache := map[plumbing.Hash]*object.Blob{}
+	hash := plumbing.NewHash("291286b4ac41952cbd1389fda66420ec03c1a9fe")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	hash = plumbing.NewHash("334cde09da4afcb74f8d2b3e6fd6cce61228b485")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	hash = plumbing.NewHash("dc248ba2b22048cc730c571a748e8ffcf7085ab9")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	deps["blob_cache"] = cache
+	changes := make(object.Changes, 1)
+	treeFrom, _ := testRepository.TreeObject(plumbing.NewHash(
+		"a1eb2ea76eb7f9bfbde9b243861474421000eb96"))
+	treeTo, _ := testRepository.TreeObject(plumbing.NewHash(
+		"994eac1cd07235bb9815e547a75c84265dea00f5"))
+	changes[0] = &object.Change{From: object.ChangeEntry{
+		Name: "analyser.go",
+		Tree: treeFrom,
+		TreeEntry: object.TreeEntry{
+			Name: "analyser.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("ffffffffffffffffffffffffffffffffffffffff"),
+		},
+	}, To: object.ChangeEntry{
+		Name: "analyser.go",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: "analyser.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("334cde09da4afcb74f8d2b3e6fd6cce61228b485"),
+		},
+	}}
+	deps["renamed_changes"] = changes
+	res, err := fd.Consume(deps)
+	assert.Nil(t, res)
+	assert.NotNil(t, err)
+	changes[0] = &object.Change{From: object.ChangeEntry{
+		Name: "analyser.go",
+		Tree: treeFrom,
+		TreeEntry: object.TreeEntry{
+			Name: "analyser.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("dc248ba2b22048cc730c571a748e8ffcf7085ab9"),
+		},
+	}, To: object.ChangeEntry{
+		Name: "analyser.go",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: "analyser.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("ffffffffffffffffffffffffffffffffffffffff"),
+		},
+	}}
+	res, err = fd.Consume(deps)
+	assert.Nil(t, res)
+	assert.NotNil(t, err)
+}

+ 1 - 0
doc.go

@@ -24,6 +24,7 @@ Then add the required analysis tree nodes:
   pipeline.AddItem(&hercules.BlobCache{})
   pipeline.AddItem(&hercules.BlobCache{})
 	pipeline.AddItem(&hercules.DaysSinceStart{})
 	pipeline.AddItem(&hercules.DaysSinceStart{})
 	pipeline.AddItem(&hercules.TreeDiff{})
 	pipeline.AddItem(&hercules.TreeDiff{})
+	pipeline.AddItem(&hercules.FileDiff{})
 	pipeline.AddItem(&hercules.RenameAnalysis{SimilarityThreshold: 80})
 	pipeline.AddItem(&hercules.RenameAnalysis{SimilarityThreshold: 80})
 	pipeline.AddItem(&hercules.IdentityDetector{})
 	pipeline.AddItem(&hercules.IdentityDetector{})
 
 

+ 1 - 1
pipeline_test.go

@@ -224,7 +224,7 @@ func init() {
 		testRepository, err = git.PlainOpen(cwd)
 		testRepository, err = git.PlainOpen(cwd)
 		if err == nil {
 		if err == nil {
 			iter, _ := testRepository.CommitObjects()
 			iter, _ := testRepository.CommitObjects()
-			commits := 0
+			commits := -1
 			for ; err != io.EOF; _, err = iter.Next() {
 			for ; err != io.EOF; _, err = iter.Next() {
 				if err != nil {
 				if err != nil {
 					panic(err)
 					panic(err)