浏览代码

Merge pull request #83 from vmarkovtsev/master

Optimize file merges using git hashes
Vadim Markovtsev 6 年之前
父节点
当前提交
431f0f5142
共有 3 个文件被更改,包括 75 次插入19 次删除
  1. 23 6
      internal/burndown/file.go
  2. 45 9
      internal/burndown/file_test.go
  3. 7 4
      leaves/burndown.go

+ 23 - 6
internal/burndown/file.go

@@ -3,6 +3,7 @@ package burndown
 import (
 import (
 	"fmt"
 	"fmt"
 
 
+	"gopkg.in/src-d/go-git.v4/plumbing"
 	"gopkg.in/src-d/hercules.v4/internal"
 	"gopkg.in/src-d/hercules.v4/internal"
 	"gopkg.in/src-d/hercules.v4/internal/rbtree"
 	"gopkg.in/src-d/hercules.v4/internal/rbtree"
 )
 )
@@ -25,6 +26,9 @@ type Status struct {
 //
 //
 // Dump() writes the tree to a string and Validate() checks the tree integrity.
 // Dump() writes the tree to a string and Validate() checks the tree integrity.
 type File struct {
 type File struct {
+	// Git hash of the contents.
+	Hash     plumbing.Hash
+
 	tree     *rbtree.RBTree
 	tree     *rbtree.RBTree
 	statuses []Status
 	statuses []Status
 }
 }
@@ -61,8 +65,8 @@ func (file *File) updateTime(currentTime int, previousTime int, delta int) {
 // last node);
 // last node);
 //
 //
 // statuses are the attached interval length mappings.
 // statuses are the attached interval length mappings.
-func NewFile(time int, length int, statuses ...Status) *File {
-	file := &File{tree: new(rbtree.RBTree), statuses: statuses}
+func NewFile(hash plumbing.Hash, time int, length int, statuses ...Status) *File {
+	file := &File{Hash: hash, tree: new(rbtree.RBTree), statuses: statuses}
 	if length > 0 {
 	if length > 0 {
 		file.updateTime(time, time, length)
 		file.updateTime(time, time, length)
 		file.tree.Insert(rbtree.Item{Key: 0, Value: time})
 		file.tree.Insert(rbtree.Item{Key: 0, Value: time})
@@ -79,8 +83,8 @@ func NewFile(time int, length int, statuses ...Status) *File {
 // vals is a slice with the starting tree values. Must match the size of keys.
 // vals is a slice with the starting tree values. Must match the size of keys.
 //
 //
 // statuses are the attached interval length mappings.
 // statuses are the attached interval length mappings.
-func NewFileFromTree(keys []int, vals []int, statuses ...Status) *File {
-	file := &File{tree: new(rbtree.RBTree), statuses: statuses}
+func NewFileFromTree(hash plumbing.Hash, keys []int, vals []int, statuses ...Status) *File {
+	file := &File{Hash: hash, tree: new(rbtree.RBTree), statuses: statuses}
 	if len(keys) != len(vals) {
 	if len(keys) != len(vals) {
 		panic("keys and vals must be of equal length")
 		panic("keys and vals must be of equal length")
 	}
 	}
@@ -95,7 +99,7 @@ func NewFileFromTree(keys []int, vals []int, statuses ...Status) *File {
 // depending on `clearStatuses` the original statuses are removed or not.
 // depending on `clearStatuses` the original statuses are removed or not.
 // Any new `statuses` are appended.
 // Any new `statuses` are appended.
 func (file *File) Clone(clearStatuses bool, statuses ...Status) *File {
 func (file *File) Clone(clearStatuses bool, statuses ...Status) *File {
-	clone := &File{tree: file.tree.Clone(), statuses: file.statuses}
+	clone := &File{Hash: file.Hash, tree: file.tree.Clone(), statuses: file.statuses}
 	if clearStatuses {
 	if clearStatuses {
 		clone.statuses = []Status{}
 		clone.statuses = []Status{}
 	}
 	}
@@ -237,7 +241,19 @@ func (file *File) Update(time int, pos int, insLength int, delLength int) {
 	}
 	}
 }
 }
 
 
-func (file *File) Merge(day int, others... *File) {
+// Merge combines several prepared File-s together. Returns the value
+// indicating whether at least one File required merging.
+func (file *File) Merge(day int, others... *File) bool {
+	dirty := false
+	for _, other := range others {
+		if file.Hash != other.Hash {
+			dirty = true
+			break
+		}
+	}
+	if !dirty {
+		return false
+	}
 	myself := file.flatten()
 	myself := file.flatten()
 	for _, other := range others {
 	for _, other := range others {
 		lines := other.flatten()
 		lines := other.flatten()
@@ -280,6 +296,7 @@ func (file *File) Merge(day int, others... *File) {
 	}
 	}
 	tree.Insert(rbtree.Item{Key: len(myself), Value: TreeEnd})
 	tree.Insert(rbtree.Item{Key: len(myself), Value: TreeEnd})
 	file.tree = tree
 	file.tree = tree
+	return true
 }
 }
 
 
 // Status returns the bound status object by the specified index.
 // Status returns the bound status object by the specified index.

+ 45 - 9
internal/burndown/file_test.go

@@ -6,6 +6,7 @@ import (
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/assert"
 	"gopkg.in/src-d/hercules.v4/internal/rbtree"
 	"gopkg.in/src-d/hercules.v4/internal/rbtree"
 	"fmt"
 	"fmt"
+	"gopkg.in/src-d/go-git.v4/plumbing"
 )
 )
 
 
 func updateStatusFile(
 func updateStatusFile(
@@ -15,7 +16,7 @@ func updateStatusFile(
 
 
 func fixtureFile() (*File, map[int]int64) {
 func fixtureFile() (*File, map[int]int64) {
 	status := map[int]int64{}
 	status := map[int]int64{}
-	file := NewFile(0, 100, NewStatus(status, updateStatusFile))
+	file := NewFile(plumbing.ZeroHash, 0, 100, NewStatus(status, updateStatusFile))
 	return file, status
 	return file, status
 }
 }
 
 
@@ -167,7 +168,7 @@ func TestInsertFile(t *testing.T) {
 
 
 func TestZeroInitializeFile(t *testing.T) {
 func TestZeroInitializeFile(t *testing.T) {
 	status := map[int]int64{}
 	status := map[int]int64{}
-	file := NewFile(0, 0, NewStatus(status, updateStatusFile))
+	file := NewFile(plumbing.ZeroHash, 0, 0, NewStatus(status, updateStatusFile))
 	assert.NotContains(t, status, 0)
 	assert.NotContains(t, status, 0)
 	dump := file.Dump()
 	dump := file.Dump()
 	// Output:
 	// Output:
@@ -418,7 +419,7 @@ func TestBug3File(t *testing.T) {
 
 
 func TestBug4File(t *testing.T) {
 func TestBug4File(t *testing.T) {
 	status := map[int]int64{}
 	status := map[int]int64{}
-	file := NewFile(0, 10, NewStatus(status, updateStatusFile))
+	file := NewFile(plumbing.ZeroHash, 0, 10, NewStatus(status, updateStatusFile))
 	file.Update(125, 0, 20, 9)
 	file.Update(125, 0, 20, 9)
 	file.Update(125, 0, 20, 20)
 	file.Update(125, 0, 20, 20)
 	file.Update(166, 12, 1, 1)
 	file.Update(166, 12, 1, 1)
@@ -448,14 +449,14 @@ func TestBug5File(t *testing.T) {
 	status := map[int]int64{}
 	status := map[int]int64{}
 	keys := []int{0, 2, 4, 7, 10}
 	keys := []int{0, 2, 4, 7, 10}
 	vals := []int{24, 28, 24, 28, -1}
 	vals := []int{24, 28, 24, 28, -1}
-	file := NewFileFromTree(keys, vals, NewStatus(status, updateStatusFile))
+	file := NewFileFromTree(plumbing.ZeroHash, keys, vals, NewStatus(status, updateStatusFile))
 	file.Update(28, 0, 1, 3)
 	file.Update(28, 0, 1, 3)
 	dump := file.Dump()
 	dump := file.Dump()
 	assert.Equal(t, "0 28\n2 24\n5 28\n8 -1\n", dump)
 	assert.Equal(t, "0 28\n2 24\n5 28\n8 -1\n", dump)
 
 
 	keys = []int{0, 1, 16, 18}
 	keys = []int{0, 1, 16, 18}
 	vals = []int{305, 0, 157, -1}
 	vals = []int{305, 0, 157, -1}
-	file = NewFileFromTree(keys, vals, NewStatus(status, updateStatusFile))
+	file = NewFileFromTree(plumbing.ZeroHash, keys, vals, NewStatus(status, updateStatusFile))
 	file.Update(310, 0, 0, 2)
 	file.Update(310, 0, 0, 2)
 	dump = file.Dump()
 	dump = file.Dump()
 	assert.Equal(t, "0 0\n14 157\n16 -1\n", dump)
 	assert.Equal(t, "0 0\n14 157\n16 -1\n", dump)
@@ -464,13 +465,13 @@ func TestBug5File(t *testing.T) {
 func TestNewFileFromTreeInvalidSize(t *testing.T) {
 func TestNewFileFromTreeInvalidSize(t *testing.T) {
 	keys := [...]int{1, 2, 3}
 	keys := [...]int{1, 2, 3}
 	vals := [...]int{4, 5}
 	vals := [...]int{4, 5}
-	assert.Panics(t, func() { NewFileFromTree(keys[:], vals[:]) })
+	assert.Panics(t, func() { NewFileFromTree(plumbing.ZeroHash, keys[:], vals[:]) })
 }
 }
 
 
 func TestUpdatePanic(t *testing.T) {
 func TestUpdatePanic(t *testing.T) {
 	keys := [...]int{0}
 	keys := [...]int{0}
 	vals := [...]int{-1}
 	vals := [...]int{-1}
-	file := NewFileFromTree(keys[:], vals[:])
+	file := NewFileFromTree(plumbing.ZeroHash, keys[:], vals[:])
 	file.tree.DeleteWithKey(0)
 	file.tree.DeleteWithKey(0)
 	file.tree.Insert(rbtree.Item{Key: -1, Value: -1})
 	file.tree.Insert(rbtree.Item{Key: -1, Value: -1})
 	var paniced interface{}
 	var paniced interface{}
@@ -492,7 +493,7 @@ func TestFileStatus(t *testing.T) {
 func TestFileValidate(t *testing.T) {
 func TestFileValidate(t *testing.T) {
 	keys := [...]int{0}
 	keys := [...]int{0}
 	vals := [...]int{-1}
 	vals := [...]int{-1}
-	file := NewFileFromTree(keys[:], vals[:])
+	file := NewFileFromTree(plumbing.ZeroHash, keys[:], vals[:])
 	file.tree.DeleteWithKey(0)
 	file.tree.DeleteWithKey(0)
 	file.tree.Insert(rbtree.Item{Key: -1, Value: -1})
 	file.tree.Insert(rbtree.Item{Key: -1, Value: -1})
 	assert.Panics(t, func() { file.Validate() })
 	assert.Panics(t, func() { file.Validate() })
@@ -566,6 +567,7 @@ func TestFileMergeMark(t *testing.T) {
 
 
 func TestFileMerge(t *testing.T) {
 func TestFileMerge(t *testing.T) {
 	file1, status := fixtureFile()
 	file1, status := fixtureFile()
+	file1.Hash = plumbing.NewHash("0b7101095af6f90a3a2f3941fdf82563a83ce4db")
 	// 0 0 | 100 -1                             [0]: 100
 	// 0 0 | 100 -1                             [0]: 100
 	file1.Update(1, 20, 30, 0)
 	file1.Update(1, 20, 30, 0)
 	// 0 0 | 20 1 | 50 0 | 130 -1               [0]: 100, [1]: 30
 	// 0 0 | 20 1 | 50 0 | 130 -1               [0]: 100, [1]: 30
@@ -576,6 +578,7 @@ func TestFileMerge(t *testing.T) {
 	file1.Update(4, 20, 10, 0)
 	file1.Update(4, 20, 10, 0)
 	// 0 0 | 20 4 | 30 1 | 50 0 | 130 -1        [0]: 100, [1]: 20, [4]: 10
 	// 0 0 | 20 4 | 30 1 | 50 0 | 130 -1        [0]: 100, [1]: 20, [4]: 10
 	file2 := file1.Clone(false)
 	file2 := file1.Clone(false)
+	assert.Equal(t, file1.Hash, file2.Hash)
 	file1.Update(TreeMergeMark, 60, 30, 30)
 	file1.Update(TreeMergeMark, 60, 30, 30)
 	// 0 0 | 20 4 | 30 1 | 50 0 | 60 M | 90 0 | 130 -1
 	// 0 0 | 20 4 | 30 1 | 50 0 | 60 M | 90 0 | 130 -1
 	// [0]: 70, [1]: 20, [4]: 10
 	// [0]: 70, [1]: 20, [4]: 10
@@ -588,7 +591,9 @@ func TestFileMerge(t *testing.T) {
 	file2.Update(6, 0, 10, 10)
 	file2.Update(6, 0, 10, 10)
 	// 0 6 | 10 0 | 20 4 | 30 1 | 50 0 | 60 5 | 80 M | 90 0 | 130 -1
 	// 0 6 | 10 0 | 20 4 | 30 1 | 50 0 | 60 5 | 80 M | 90 0 | 130 -1
 	// [0]: 60, [1]: 20, [4]: 10, [5]: 20, [6]: 10
 	// [0]: 60, [1]: 20, [4]: 10, [5]: 20, [6]: 10
-	file1.Merge(7, file2)
+	file2.Hash = plumbing.ZeroHash
+	dirty := file1.Merge(7, file2)
+	assert.True(t, dirty)
 	// 0 0 | 20 4 | 30 1 | 50 0 | 60 5 | 80 7 | 90 0 | 130 -1
 	// 0 0 | 20 4 | 30 1 | 50 0 | 60 5 | 80 7 | 90 0 | 130 -1
 	// [0]: 70, [1]: 20, [4]: 10, [5]: 20, [6]: 0, [7]: 10
 	// [0]: 70, [1]: 20, [4]: 10, [5]: 20, [6]: 0, [7]: 10
 	dump := file1.Dump()
 	dump := file1.Dump()
@@ -601,4 +606,35 @@ func TestFileMerge(t *testing.T) {
 	assert.Equal(t, int64(20), status[5])
 	assert.Equal(t, int64(20), status[5])
 	assert.Equal(t, int64(0), status[6])
 	assert.Equal(t, int64(0), status[6])
 	assert.Equal(t, int64(10), status[7])
 	assert.Equal(t, int64(10), status[7])
+}
+
+func TestFileMergeNoop(t *testing.T) {
+	file1, status := fixtureFile()
+	// 0 0 | 100 -1                             [0]: 100
+	file1.Update(1, 20, 30, 0)
+	// 0 0 | 20 1 | 50 0 | 130 -1               [0]: 100, [1]: 30
+	file1.Update(2, 20, 0, 5)
+	// 0 0 | 20 1 | 45 0 | 125 -1               [0]: 100, [1]: 25
+	file1.Update(3, 20, 0, 5)
+	// 0 0 | 20 1 | 40 0 | 120 -1               [0]: 100, [1]: 20
+	file1.Update(4, 20, 10, 0)
+	// 0 0 | 20 4 | 30 1 | 50 0 | 130 -1        [0]: 100, [1]: 20, [4]: 10
+	file2 := file1.Clone(false)
+	dirty := file1.Merge(7, file2)
+	assert.False(t, dirty)
+	dump1 := file1.Dump()
+	dump2 := file2.Dump()
+	assert.Equal(t, dump1, dump2)
+	assert.Equal(t, dump1, "0 0\n20 4\n30 1\n50 0\n130 -1\n")
+	assert.Equal(t, int64(100), status[0])
+	assert.Equal(t, int64(20), status[1])
+	assert.Equal(t, int64(0), status[2])
+	assert.Equal(t, int64(0), status[3])
+	assert.Equal(t, int64(10), status[4])
+	file1.Update(TreeMergeMark, 60, 30, 30)
+	// 0 0 | 20 4 | 30 1 | 50 0 | 60 M | 90 0 | 130 -1
+	// [0]: 70, [1]: 20, [4]: 10
+	dirty = file1.Merge(7, file2)
+	// because the hashes are still the same
+	assert.False(t, dirty)
 }
 }

+ 7 - 4
leaves/burndown.go

@@ -860,8 +860,8 @@ func (analyser *BurndownAnalysis) updateMatrix(
 }
 }
 
 
 func (analyser *BurndownAnalysis) newFile(
 func (analyser *BurndownAnalysis) newFile(
-	author int, day int, size int, global map[int]int64, people []map[int]int64,
-	matrix []map[int]int64) *burndown.File {
+	hash plumbing.Hash, author int, day int, size int, global map[int]int64,
+	people []map[int]int64, matrix []map[int]int64) *burndown.File {
 	statuses := make([]burndown.Status, 1)
 	statuses := make([]burndown.Status, 1)
 	statuses[0] = burndown.NewStatus(global, analyser.updateStatus)
 	statuses[0] = burndown.NewStatus(global, analyser.updateStatus)
 	if analyser.TrackFiles {
 	if analyser.TrackFiles {
@@ -872,7 +872,7 @@ func (analyser *BurndownAnalysis) newFile(
 		statuses = append(statuses, burndown.NewStatus(matrix, analyser.updateMatrix))
 		statuses = append(statuses, burndown.NewStatus(matrix, analyser.updateMatrix))
 		day = analyser.packPersonWithDay(author, day)
 		day = analyser.packPersonWithDay(author, day)
 	}
 	}
-	return burndown.NewFile(day, size, statuses...)
+	return burndown.NewFile(hash, day, size, statuses...)
 }
 }
 
 
 func (analyser *BurndownAnalysis) handleInsertion(
 func (analyser *BurndownAnalysis) handleInsertion(
@@ -891,7 +891,8 @@ func (analyser *BurndownAnalysis) handleInsertion(
 		return fmt.Errorf("file %s already exists", name)
 		return fmt.Errorf("file %s already exists", name)
 	}
 	}
 	file = analyser.newFile(
 	file = analyser.newFile(
-		author, analyser.day, lines, analyser.globalStatus, analyser.people, analyser.matrix)
+		blob.Hash, author, analyser.day, lines,
+		analyser.globalStatus, analyser.people, analyser.matrix)
 	analyser.files[name] = file
 	analyser.files[name] = file
 	return nil
 	return nil
 }
 }
@@ -910,6 +911,7 @@ func (analyser *BurndownAnalysis) handleDeletion(
 	name := change.From.Name
 	name := change.From.Name
 	file := analyser.files[name]
 	file := analyser.files[name]
 	file.Update(analyser.packPersonWithDay(author, analyser.day), 0, 0, lines)
 	file.Update(analyser.packPersonWithDay(author, analyser.day), 0, 0, lines)
+	file.Hash = plumbing.ZeroHash
 	delete(analyser.files, name)
 	delete(analyser.files, name)
 	return nil
 	return nil
 }
 }
@@ -923,6 +925,7 @@ func (analyser *BurndownAnalysis) handleModification(
 		// this indeed may happen
 		// this indeed may happen
 		return analyser.handleInsertion(change, author, cache)
 		return analyser.handleInsertion(change, author, cache)
 	}
 	}
+	file.Hash = change.To.TreeEntry.Hash
 
 
 	// possible rename
 	// possible rename
 	if change.To.Name != change.From.Name {
 	if change.To.Name != change.From.Name {