瀏覽代碼

Merge pull request #33 from vmarkovtsev/master

Diff business
Vadim Markovtsev 7 年之前
父節點
當前提交
90bb3c9e1b
共有 27 個文件被更改,包括 9524 次插入175 次删除
  1. 36 0
      OCTOPUS.md
  2. 5 4
      blob_cache.go
  3. 11 11
      blob_cache_test.go
  4. 7 6
      burndown.go
  5. 36 36
      burndown_test.go
  6. 11 6
      contrib/_plugin_example/churn_analysis.go
  7. 3 3
      couples.go
  8. 16 16
      couples_test.go
  9. 6 2
      day.go
  10. 6 6
      day_test.go
  11. 28 7
      diff.go
  12. 116 10
      diff_refiner.go
  13. 98 0
      diff_refiner_test.go
  14. 58 10
      diff_test.go
  15. 4 2
      identity.go
  16. 3 3
      identity_test.go
  17. 2 2
      pipeline_test.go
  18. 5 5
      renames.go
  19. 7 7
      renames_test.go
  20. 295 0
      test_data/1.java
  21. 338 0
      test_data/2.java
  22. 3938 0
      test_data/uast1.pb
  23. 4445 0
      test_data/uast2.pb
  24. 6 2
      tree_diff.go
  25. 3 3
      tree_diff_test.go
  26. 22 15
      uast.go
  27. 19 19
      uast_test.go

+ 36 - 0
OCTOPUS.md

@@ -0,0 +1,36 @@
+# Forks and merges in commit history
+
+Hercules expects the commit history to be linear.
+It follows the main (zero index) branch when it encounters a fork.
+This behavior ignores all the side branches, and we are currently
+thinking how to include them into the analysis.
+
+### Plan
+
+* Commits must sorted by time.
+* When a fork is hit, clone the pipeline. Assign the old instance to the main branch and new
+instances to the sprouts. BurndownAnalysis should share the same counters for efficiency
+and simplicity, but the files must be copied.
+* Follow each branch independently. Clone side pipelines as needed.
+* Join pipelines on merge commits. Side pipelines are killed, the main instance survives.
+This will be tricky for Burndown because we need to join the files together while preserving
+the line annotations.
+* Merge commits should have diffs which correspond to CGit diffs. So far they represent the diff
+with the previous commits in the main branch.
+* The sequence of commits must be the analysis scenario: it must inform when to fork and to merge,
+which pipeline instance to apply.
+
+### New APIs
+
+* PipelineItem
+  * `Fork()`
+  * `Merge()`
+  
+### Major changes
+
+* `Pipeline`
+  * `Commits()`
+  * `Run()`
+* `Burndown`
+* `Couples`
+* `FileDiff`

+ 5 - 4
blob_cache.go

@@ -20,6 +20,7 @@ type BlobCache struct {
 
 const (
 	ConfigBlobCacheIgnoreMissingSubmodules = "BlobCache.IgnoreMissingSubmodules"
+	DependencyBlobCache                    = "blob_cache"
 )
 
 func (cache *BlobCache) Name() string {
@@ -27,12 +28,12 @@ func (cache *BlobCache) Name() string {
 }
 
 func (cache *BlobCache) Provides() []string {
-	arr := [...]string{"blob_cache"}
+	arr := [...]string{DependencyBlobCache}
 	return arr[:]
 }
 
 func (cache *BlobCache) Requires() []string {
-	arr := [...]string{"changes"}
+	arr := [...]string{DependencyTreeChanges}
 	return arr[:]
 }
 
@@ -60,7 +61,7 @@ func (cache *BlobCache) Initialize(repository *git.Repository) {
 
 func (self *BlobCache) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
 	commit := deps["commit"].(*object.Commit)
-	changes := deps["changes"].(object.Changes)
+	changes := deps[DependencyTreeChanges].(object.Changes)
 	cache := map[plumbing.Hash]*object.Blob{}
 	newCache := map[plumbing.Hash]*object.Blob{}
 	for _, change := range changes {
@@ -115,7 +116,7 @@ func (self *BlobCache) Consume(deps map[string]interface{}) (map[string]interfac
 		}
 	}
 	self.cache = newCache
-	return map[string]interface{}{"blob_cache": cache}, nil
+	return map[string]interface{}{DependencyBlobCache: cache}, nil
 }
 
 type FileGetter func(path string) (*object.File, error)

+ 11 - 11
blob_cache_test.go

@@ -34,7 +34,7 @@ func TestBlobCacheMetadata(t *testing.T) {
 	cache := fixtureBlobCache()
 	assert.Equal(t, cache.Name(), "BlobCache")
 	assert.Equal(t, len(cache.Provides()), 1)
-	assert.Equal(t, cache.Provides()[0], "blob_cache")
+	assert.Equal(t, cache.Provides()[0], DependencyBlobCache)
 	assert.Equal(t, len(cache.Requires()), 1)
 	changes := &TreeDiff{}
 	assert.Equal(t, cache.Requires()[0], changes.Provides()[0])
@@ -80,11 +80,11 @@ func TestBlobCacheConsumeModification(t *testing.T) {
 	}}
 	deps := map[string]interface{}{}
 	deps["commit"] = commit
-	deps["changes"] = changes
+	deps[DependencyTreeChanges] = changes
 	result, err := fixtureBlobCache().Consume(deps)
 	assert.Nil(t, err)
 	assert.Equal(t, len(result), 1)
-	cacheIface, exists := result["blob_cache"]
+	cacheIface, exists := result[DependencyBlobCache]
 	assert.True(t, exists)
 	cache := cacheIface.(map[plumbing.Hash]*object.Blob)
 	assert.Equal(t, len(cache), 2)
@@ -126,11 +126,11 @@ func TestBlobCacheConsumeInsertionDeletion(t *testing.T) {
 	}
 	deps := map[string]interface{}{}
 	deps["commit"] = commit
-	deps["changes"] = changes
+	deps[DependencyTreeChanges] = changes
 	result, err := fixtureBlobCache().Consume(deps)
 	assert.Nil(t, err)
 	assert.Equal(t, len(result), 1)
-	cacheIface, exists := result["blob_cache"]
+	cacheIface, exists := result[DependencyBlobCache]
 	assert.True(t, exists)
 	cache := cacheIface.(map[plumbing.Hash]*object.Blob)
 	assert.Equal(t, len(cache), 2)
@@ -153,7 +153,7 @@ func TestBlobCacheConsumeNoAction(t *testing.T) {
 	changes[0] = &object.Change{From: object.ChangeEntry{}, To: object.ChangeEntry{}}
 	deps := map[string]interface{}{}
 	deps["commit"] = commit
-	deps["changes"] = changes
+	deps[DependencyTreeChanges] = changes
 	result, err := fixtureBlobCache().Consume(deps)
 	assert.Nil(t, result)
 	assert.NotNil(t, err)
@@ -190,7 +190,7 @@ func TestBlobCacheConsumeBadHashes(t *testing.T) {
 	}}
 	deps := map[string]interface{}{}
 	deps["commit"] = commit
-	deps["changes"] = changes
+	deps[DependencyTreeChanges] = changes
 	result, err := fixtureBlobCache().Consume(deps)
 	assert.Nil(t, result)
 	assert.NotNil(t, err)
@@ -237,7 +237,7 @@ func TestBlobCacheConsumeInvalidHash(t *testing.T) {
 	}}
 	deps := map[string]interface{}{}
 	deps["commit"] = commit
-	deps["changes"] = changes
+	deps[DependencyTreeChanges] = changes
 	result, err := fixtureBlobCache().Consume(deps)
 	assert.Nil(t, result)
 	assert.NotNil(t, err)
@@ -296,11 +296,11 @@ func TestBlobCacheDeleteInvalidBlob(t *testing.T) {
 	}
 	deps := map[string]interface{}{}
 	deps["commit"] = commit
-	deps["changes"] = changes
+	deps[DependencyTreeChanges] = changes
 	result, err := fixtureBlobCache().Consume(deps)
 	assert.Nil(t, err)
 	assert.Equal(t, len(result), 1)
-	cacheIface, exists := result["blob_cache"]
+	cacheIface, exists := result[DependencyBlobCache]
 	assert.True(t, exists)
 	cache := cacheIface.(map[plumbing.Hash]*object.Blob)
 	assert.Equal(t, len(cache), 1)
@@ -327,7 +327,7 @@ func TestBlobCacheInsertInvalidBlob(t *testing.T) {
 	}
 	deps := map[string]interface{}{}
 	deps["commit"] = commit
-	deps["changes"] = changes
+	deps[DependencyTreeChanges] = changes
 	result, err := fixtureBlobCache().Consume(deps)
 	assert.NotNil(t, err)
 	assert.Equal(t, len(result), 0)

+ 7 - 6
burndown.go

@@ -96,7 +96,8 @@ func (analyser *BurndownAnalysis) Provides() []string {
 }
 
 func (analyser *BurndownAnalysis) Requires() []string {
-	arr := [...]string{"file_diff", "changes", "blob_cache", "day", "author"}
+	arr := [...]string{
+		DependencyFileDiff, DependencyTreeChanges, DependencyBlobCache, DependencyDay, DependencyAuthor}
 	return arr[:]
 }
 
@@ -191,17 +192,17 @@ func (analyser *BurndownAnalysis) Consume(deps map[string]interface{}) (map[stri
 	if sampling == 0 {
 		sampling = 1
 	}
-	author := deps["author"].(int)
-	analyser.day = deps["day"].(int)
+	author := deps[DependencyAuthor].(int)
+	analyser.day = deps[DependencyDay].(int)
 	delta := (analyser.day / sampling) - (analyser.previousDay / sampling)
 	if delta > 0 {
 		analyser.previousDay = analyser.day
 		gs, fss, pss := analyser.groupStatus()
 		analyser.updateHistories(gs, fss, pss, delta)
 	}
-	cache := deps["blob_cache"].(map[plumbing.Hash]*object.Blob)
-	treeDiffs := deps["changes"].(object.Changes)
-	fileDiffs := deps["file_diff"].(map[string]FileDiffData)
+	cache := deps[DependencyBlobCache].(map[plumbing.Hash]*object.Blob)
+	treeDiffs := deps[DependencyTreeChanges].(object.Changes)
+	fileDiffs := deps[DependencyFileDiff].(map[string]FileDiffData)
 	for _, change := range treeDiffs {
 		action, err := change.Action()
 		if err != nil {

+ 36 - 36
burndown_test.go

@@ -18,7 +18,7 @@ func TestBurndownMeta(t *testing.T) {
 	burndown := BurndownAnalysis{}
 	assert.Equal(t, burndown.Name(), "Burndown")
 	assert.Equal(t, len(burndown.Provides()), 0)
-	required := [...]string{"file_diff", "changes", "blob_cache", "day", "author"}
+	required := [...]string{DependencyFileDiff, DependencyTreeChanges, DependencyBlobCache, DependencyDay, DependencyAuthor}
 	for _, name := range required {
 		assert.Contains(t, burndown.Requires(), name)
 	}
@@ -105,8 +105,8 @@ func TestBurndownConsumeFinalize(t *testing.T) {
 	deps := map[string]interface{}{}
 
 	// stage 1
-	deps["author"] = 0
-	deps["day"] = 0
+	deps[DependencyAuthor] = 0
+	deps[DependencyDay] = 0
 	cache := map[plumbing.Hash]*object.Blob{}
 	hash := plumbing.NewHash("291286b4ac41952cbd1389fda66420ec03c1a9fe")
 	cache[hash], _ = testRepository.BlobObject(hash)
@@ -116,7 +116,7 @@ func TestBurndownConsumeFinalize(t *testing.T) {
 	cache[hash], _ = testRepository.BlobObject(hash)
 	hash = plumbing.NewHash("dc248ba2b22048cc730c571a748e8ffcf7085ab9")
 	cache[hash], _ = testRepository.BlobObject(hash)
-	deps["blob_cache"] = cache
+	deps[DependencyBlobCache] = cache
 	changes := make(object.Changes, 3)
 	treeFrom, _ := testRepository.TreeObject(plumbing.NewHash(
 		"a1eb2ea76eb7f9bfbde9b243861474421000eb96"))
@@ -159,11 +159,11 @@ func TestBurndownConsumeFinalize(t *testing.T) {
 		},
 	},
 	}
-	deps["changes"] = changes
+	deps[DependencyTreeChanges] = changes
 	fd := fixtureFileDiff()
 	result, err := fd.Consume(deps)
 	assert.Nil(t, err)
-	deps["file_diff"] = result["file_diff"]
+	deps[DependencyFileDiff] = result[DependencyFileDiff]
 	result, err = burndown.Consume(deps)
 	assert.Nil(t, result)
 	assert.Nil(t, err)
@@ -191,8 +191,8 @@ func TestBurndownConsumeFinalize(t *testing.T) {
 
 	// stage 2
 	// 2b1ed978194a94edeabbca6de7ff3b5771d4d665
-	deps["author"] = 1
-	deps["day"] = 30
+	deps[DependencyAuthor] = 1
+	deps[DependencyDay] = 30
 	cache = map[plumbing.Hash]*object.Blob{}
 	hash = plumbing.NewHash("291286b4ac41952cbd1389fda66420ec03c1a9fe")
 	cache[hash], _ = testRepository.BlobObject(hash)
@@ -204,7 +204,7 @@ func TestBurndownConsumeFinalize(t *testing.T) {
 	cache[hash], _ = testRepository.BlobObject(hash)
 	hash = plumbing.NewHash("f7d918ec500e2f925ecde79b51cc007bac27de72")
 	cache[hash], _ = testRepository.BlobObject(hash)
-	deps["blob_cache"] = cache
+	deps[DependencyBlobCache] = cache
 	changes = make(object.Changes, 3)
 	treeFrom, _ = testRepository.TreeObject(plumbing.NewHash(
 		"96c6ece9b2f3c7c51b83516400d278dea5605100"))
@@ -256,11 +256,11 @@ func TestBurndownConsumeFinalize(t *testing.T) {
 		},
 	}, To: object.ChangeEntry{},
 	}
-	deps["changes"] = changes
+	deps[DependencyTreeChanges] = changes
 	fd = fixtureFileDiff()
 	result, err = fd.Consume(deps)
 	assert.Nil(t, err)
-	deps["file_diff"] = result["file_diff"]
+	deps[DependencyFileDiff] = result[DependencyFileDiff]
 	result, err = burndown.Consume(deps)
 	assert.Nil(t, result)
 	assert.Nil(t, err)
@@ -325,8 +325,8 @@ func TestBurndownSerialize(t *testing.T) {
 	burndown.Initialize(testRepository)
 	deps := map[string]interface{}{}
 	// stage 1
-	deps["author"] = 0
-	deps["day"] = 0
+	deps[DependencyAuthor] = 0
+	deps[DependencyDay] = 0
 	cache := map[plumbing.Hash]*object.Blob{}
 	hash := plumbing.NewHash("291286b4ac41952cbd1389fda66420ec03c1a9fe")
 	cache[hash], _ = testRepository.BlobObject(hash)
@@ -336,7 +336,7 @@ func TestBurndownSerialize(t *testing.T) {
 	cache[hash], _ = testRepository.BlobObject(hash)
 	hash = plumbing.NewHash("dc248ba2b22048cc730c571a748e8ffcf7085ab9")
 	cache[hash], _ = testRepository.BlobObject(hash)
-	deps["blob_cache"] = cache
+	deps[DependencyBlobCache] = cache
 	changes := make(object.Changes, 3)
 	treeFrom, _ := testRepository.TreeObject(plumbing.NewHash(
 		"a1eb2ea76eb7f9bfbde9b243861474421000eb96"))
@@ -379,16 +379,16 @@ func TestBurndownSerialize(t *testing.T) {
 		},
 	},
 	}
-	deps["changes"] = changes
+	deps[DependencyTreeChanges] = changes
 	fd := fixtureFileDiff()
 	result, _ := fd.Consume(deps)
-	deps["file_diff"] = result["file_diff"]
+	deps[DependencyFileDiff] = result[DependencyFileDiff]
 	burndown.Consume(deps)
 
 	// stage 2
 	// 2b1ed978194a94edeabbca6de7ff3b5771d4d665
-	deps["author"] = 1
-	deps["day"] = 30
+	deps[DependencyAuthor] = 1
+	deps[DependencyDay] = 30
 	cache = map[plumbing.Hash]*object.Blob{}
 	hash = plumbing.NewHash("291286b4ac41952cbd1389fda66420ec03c1a9fe")
 	cache[hash], _ = testRepository.BlobObject(hash)
@@ -400,7 +400,7 @@ func TestBurndownSerialize(t *testing.T) {
 	cache[hash], _ = testRepository.BlobObject(hash)
 	hash = plumbing.NewHash("f7d918ec500e2f925ecde79b51cc007bac27de72")
 	cache[hash], _ = testRepository.BlobObject(hash)
-	deps["blob_cache"] = cache
+	deps[DependencyBlobCache] = cache
 	changes = make(object.Changes, 3)
 	treeFrom, _ = testRepository.TreeObject(plumbing.NewHash(
 		"96c6ece9b2f3c7c51b83516400d278dea5605100"))
@@ -452,10 +452,10 @@ func TestBurndownSerialize(t *testing.T) {
 		},
 	}, To: object.ChangeEntry{},
 	}
-	deps["changes"] = changes
+	deps[DependencyTreeChanges] = changes
 	fd = fixtureFileDiff()
 	result, _ = fd.Consume(deps)
-	deps["file_diff"] = result["file_diff"]
+	deps[DependencyFileDiff] = result[DependencyFileDiff]
 	people := [...]string{"one@srcd", "two@srcd"}
 	burndown.reversedPeopleDict = people[:]
 	burndown.Consume(deps)
@@ -679,12 +679,12 @@ func TestBurndownAddMatrixCrazy(t *testing.T) {
 	*/
 	addBurndownMatrix(added, 5, 3, daily, 0)
 	/*
-	for _, row := range daily {
-	  for _, v := range row {
-		  fmt.Print(v, " ")
-	  }
-		fmt.Println()
-	}
+		for _, row := range daily {
+		  for _, v := range row {
+			  fmt.Print(v, " ")
+		  }
+			fmt.Println()
+		}
 	*/
 	// check pinned points
 	for y := 0; y < 5; y++ {
@@ -753,14 +753,14 @@ func TestBurndownAddMatrixNaNs(t *testing.T) {
 		  12 20 25 40
 	*/
 	addBurndownMatrix(added, 4, 4, daily, 0)
-  /*
-	for _, row := range daily {
-	  for _, v := range row {
-		  fmt.Print(v, " ")
-	  }
-		fmt.Println()
-	}
-  */
+	/*
+		for _, row := range daily {
+		  for _, v := range row {
+			  fmt.Print(v, " ")
+		  }
+			fmt.Println()
+		}
+	*/
 	// check pinned points
 	for y := 0; y < 4; y++ {
 		for x := 0; x < 4; x++ {
@@ -777,7 +777,7 @@ func TestBurndownAddMatrixNaNs(t *testing.T) {
 			assert.Zero(t, daily[y][x])
 		}
 		var prev float32
-		for y := x-4; y < x; y++ {
+		for y := x - 4; y < x; y++ {
 			if y < 0 {
 				continue
 			}

+ 11 - 6
contrib/_plugin_example/churn_analysis.go

@@ -68,7 +68,12 @@ func (churn *ChurnAnalysis) Provides() []string {
 // day - number of days since start for each commit
 // author - author of the commit
 func (churn *ChurnAnalysis) Requires() []string {
-	arr := [...]string{"file_diff", "changes", "blob_cache", "day", "author"}
+	arr := [...]string{
+		hercules.DependencyFileDiff,
+		hercules.DependencyTreeChanges,
+		hercules.DependencyBlobCache,
+		hercules.DependencyDay,
+		hercules.DependencyAuthor}
 	return arr[:]
 }
 
@@ -107,11 +112,11 @@ func (churn *ChurnAnalysis) Initialize(repository *git.Repository) {
 }
 
 func (churn *ChurnAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
-	fileDiffs := deps["file_diff"].(map[string]hercules.FileDiffData)
-	treeDiffs := deps["changes"].(object.Changes)
-	cache := deps["blob_cache"].(map[plumbing.Hash]*object.Blob)
-	day := deps["day"].(int)
-	author := deps["author"].(int)
+	fileDiffs := deps[hercules.DependencyFileDiff].(map[string]hercules.FileDiffData)
+	treeDiffs := deps[hercules.DependencyTreeChanges].(object.Changes)
+	cache := deps[hercules.DependencyBlobCache].(map[plumbing.Hash]*object.Blob)
+	day := deps[hercules.DependencyDay].(int)
+	author := deps[hercules.DependencyAuthor].(int)
 	for _, change := range treeDiffs {
 		action, err := change.Action()
 		if err != nil {

+ 3 - 3
couples.go

@@ -46,7 +46,7 @@ func (couples *CouplesAnalysis) Provides() []string {
 }
 
 func (couples *CouplesAnalysis) Requires() []string {
-	arr := [...]string{"author", "changes"}
+	arr := [...]string{DependencyAuthor, DependencyTreeChanges}
 	return arr[:]
 }
 
@@ -75,12 +75,12 @@ func (couples *CouplesAnalysis) Initialize(repository *git.Repository) {
 }
 
 func (couples *CouplesAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
-	author := deps["author"].(int)
+	author := deps[DependencyAuthor].(int)
 	if author == MISSING_AUTHOR {
 		author = couples.PeopleNumber
 	}
 	couples.people_commits[author] += 1
-	tree_diff := deps["changes"].(object.Changes)
+	tree_diff := deps[DependencyTreeChanges].(object.Changes)
 	context := make([]string, 0)
 	deleteFile := func(name string) {
 		// we do not remove the file from people - the context does not expire

+ 16 - 16
couples_test.go

@@ -24,8 +24,8 @@ func TestCouplesMeta(t *testing.T) {
 	assert.Equal(t, c.Name(), "Couples")
 	assert.Equal(t, len(c.Provides()), 0)
 	assert.Equal(t, len(c.Requires()), 2)
-	assert.Equal(t, c.Requires()[0], "author")
-	assert.Equal(t, c.Requires()[1], "changes")
+	assert.Equal(t, c.Requires()[0], DependencyAuthor)
+	assert.Equal(t, c.Requires()[1], DependencyTreeChanges)
 	assert.Equal(t, c.Flag(), "couples")
 	assert.Len(t, c.ListConfigurationOptions(), 0)
 }
@@ -78,16 +78,16 @@ func generateChanges(names ...string) object.Changes {
 func TestCouplesConsumeFinalize(t *testing.T) {
 	c := fixtureCouples()
 	deps := map[string]interface{}{}
-	deps["author"] = 0
-	deps["changes"] = generateChanges("+two", "+four", "+six")
+	deps[DependencyAuthor] = 0
+	deps[DependencyTreeChanges] = generateChanges("+two", "+four", "+six")
 	c.Consume(deps)
-	deps["changes"] = generateChanges("+one", "-two", "=three", ">four>five")
+	deps[DependencyTreeChanges] = generateChanges("+one", "-two", "=three", ">four>five")
 	c.Consume(deps)
-	deps["author"] = 1
-	deps["changes"] = generateChanges("=one", "=three", "-six")
+	deps[DependencyAuthor] = 1
+	deps[DependencyTreeChanges] = generateChanges("=one", "=three", "-six")
 	c.Consume(deps)
-	deps["author"] = 2
-	deps["changes"] = generateChanges("=five")
+	deps[DependencyAuthor] = 2
+	deps[DependencyTreeChanges] = generateChanges("=five")
 	c.Consume(deps)
 	assert.Equal(t, len(c.people[0]), 5)
 	assert.Equal(t, c.people[0]["one"], 1)
@@ -170,16 +170,16 @@ func TestCouplesSerialize(t *testing.T) {
 	c.Configure(facts)
 	assert.Equal(t, c.PeopleNumber, 3)
 	deps := map[string]interface{}{}
-	deps["author"] = 0
-	deps["changes"] = generateChanges("+two", "+four", "+six")
+	deps[DependencyAuthor] = 0
+	deps[DependencyTreeChanges] = generateChanges("+two", "+four", "+six")
 	c.Consume(deps)
-	deps["changes"] = generateChanges("+one", "-two", "=three", ">four>five")
+	deps[DependencyTreeChanges] = generateChanges("+one", "-two", "=three", ">four>five")
 	c.Consume(deps)
-	deps["author"] = 1
-	deps["changes"] = generateChanges("=one", "=three", "-six")
+	deps[DependencyAuthor] = 1
+	deps[DependencyTreeChanges] = generateChanges("=one", "=three", "-six")
 	c.Consume(deps)
-	deps["author"] = 2
-	deps["changes"] = generateChanges("=five")
+	deps[DependencyAuthor] = 2
+	deps[DependencyTreeChanges] = generateChanges("=five")
 	c.Consume(deps)
 	result := c.Finalize().(CouplesResult)
 	buffer := &bytes.Buffer{}

+ 6 - 2
day.go

@@ -12,12 +12,16 @@ type DaysSinceStart struct {
 	previousDay int
 }
 
+const (
+	DependencyDay = "day"
+)
+
 func (days *DaysSinceStart) Name() string {
 	return "DaysSinceStart"
 }
 
 func (days *DaysSinceStart) Provides() []string {
-	arr := [...]string{"day"}
+	arr := [...]string{DependencyDay}
 	return arr[:]
 }
 
@@ -51,7 +55,7 @@ func (days *DaysSinceStart) Consume(deps map[string]interface{}) (map[string]int
 		day = days.previousDay
 	}
 	days.previousDay = day
-	return map[string]interface{}{"day": day}, nil
+	return map[string]interface{}{DependencyDay: day}, nil
 }
 
 func init() {

+ 6 - 6
day_test.go

@@ -17,7 +17,7 @@ func TestDaysSinceStartMeta(t *testing.T) {
 	dss := fixtureDaysSinceStart()
 	assert.Equal(t, dss.Name(), "DaysSinceStart")
 	assert.Equal(t, len(dss.Provides()), 1)
-	assert.Equal(t, dss.Provides()[0], "day")
+	assert.Equal(t, dss.Provides()[0], DependencyDay)
 	assert.Equal(t, len(dss.Requires()), 0)
 	assert.Len(t, dss.ListConfigurationOptions(), 0)
 	dss.Configure(nil)
@@ -42,7 +42,7 @@ func TestDaysSinceStartConsume(t *testing.T) {
 	deps["index"] = 0
 	res, err := dss.Consume(deps)
 	assert.Nil(t, err)
-	assert.Equal(t, res["day"].(int), 0)
+	assert.Equal(t, res[DependencyDay].(int), 0)
 	assert.Equal(t, dss.previousDay, 0)
 	assert.Equal(t, dss.day0.Hour(), 1)   // 18 UTC+1
 	assert.Equal(t, dss.day0.Minute(), 0) // 30
@@ -54,7 +54,7 @@ func TestDaysSinceStartConsume(t *testing.T) {
 	deps["index"] = 10
 	res, err = dss.Consume(deps)
 	assert.Nil(t, err)
-	assert.Equal(t, res["day"].(int), 1)
+	assert.Equal(t, res[DependencyDay].(int), 1)
 	assert.Equal(t, dss.previousDay, 1)
 
 	commit, _ = testRepository.CommitObject(plumbing.NewHash(
@@ -63,7 +63,7 @@ func TestDaysSinceStartConsume(t *testing.T) {
 	deps["index"] = 20
 	res, err = dss.Consume(deps)
 	assert.Nil(t, err)
-	assert.Equal(t, res["day"].(int), 1)
+	assert.Equal(t, res[DependencyDay].(int), 1)
 	assert.Equal(t, dss.previousDay, 1)
 
 	commit, _ = testRepository.CommitObject(plumbing.NewHash(
@@ -72,7 +72,7 @@ func TestDaysSinceStartConsume(t *testing.T) {
 	deps["index"] = 20
 	res, err = dss.Consume(deps)
 	assert.Nil(t, err)
-	assert.Equal(t, res["day"].(int), 2)
+	assert.Equal(t, res[DependencyDay].(int), 2)
 	assert.Equal(t, dss.previousDay, 2)
 
 	commit, _ = testRepository.CommitObject(plumbing.NewHash(
@@ -81,6 +81,6 @@ func TestDaysSinceStartConsume(t *testing.T) {
 	deps["index"] = 30
 	res, err = dss.Consume(deps)
 	assert.Nil(t, err)
-	assert.Equal(t, res["day"].(int), 2)
+	assert.Equal(t, res[DependencyDay].(int), 2)
 	assert.Equal(t, dss.previousDay, 2)
 }

+ 28 - 7
diff.go

@@ -15,8 +15,15 @@ import (
 
 // FileDiff calculates the difference of files which were modified.
 type FileDiff struct {
+	CleanupDisabled bool
 }
 
+const (
+	ConfigFileDiffDisableCleanup = "FileDiff.NoCleanup"
+
+	DependencyFileDiff = "file_diff"
+)
+
 type FileDiffData struct {
 	OldLinesOfCode int
 	NewLinesOfCode int
@@ -28,27 +35,38 @@ func (diff *FileDiff) Name() string {
 }
 
 func (diff *FileDiff) Provides() []string {
-	arr := [...]string{"file_diff"}
+	arr := [...]string{DependencyFileDiff}
 	return arr[:]
 }
 
 func (diff *FileDiff) Requires() []string {
-	arr := [...]string{"changes", "blob_cache"}
+	arr := [...]string{DependencyTreeChanges, DependencyBlobCache}
 	return arr[:]
 }
 
 func (diff *FileDiff) ListConfigurationOptions() []ConfigurationOption {
-	return []ConfigurationOption{}
+	options := [...]ConfigurationOption{{
+		Name:        ConfigFileDiffDisableCleanup,
+		Description: "Do not apply additional heuristics to improve diffs.",
+		Flag:        "no-diff-cleanup",
+		Type:        BoolConfigurationOption,
+		Default:     false},
+	}
+	return options[:]
 }
 
-func (diff *FileDiff) Configure(facts map[string]interface{}) {}
+func (diff *FileDiff) Configure(facts map[string]interface{}) {
+	if val, exists := facts[ConfigFileDiffDisableCleanup].(bool); exists {
+		diff.CleanupDisabled = val
+	}
+}
 
 func (diff *FileDiff) Initialize(repository *git.Repository) {}
 
 func (diff *FileDiff) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
 	result := map[string]FileDiffData{}
-	cache := deps["blob_cache"].(map[plumbing.Hash]*object.Blob)
-	tree_diff := deps["changes"].(object.Changes)
+	cache := deps[DependencyBlobCache].(map[plumbing.Hash]*object.Blob)
+	tree_diff := deps[DependencyTreeChanges].(object.Changes)
 	for _, change := range tree_diff {
 		action, err := change.Action()
 		if err != nil {
@@ -71,6 +89,9 @@ func (diff *FileDiff) Consume(deps map[string]interface{}) (map[string]interface
 			dmp := diffmatchpatch.New()
 			src, dst, _ := dmp.DiffLinesToRunes(str_from, str_to)
 			diffs := dmp.DiffMainRunes(src, dst, false)
+			if !diff.CleanupDisabled {
+				diffs = dmp.DiffCleanupSemanticLossless(diffs)
+			}
 			result[change.To.Name] = FileDiffData{
 				OldLinesOfCode: len(src),
 				NewLinesOfCode: len(dst),
@@ -80,7 +101,7 @@ func (diff *FileDiff) Consume(deps map[string]interface{}) (map[string]interface
 			continue
 		}
 	}
-	return map[string]interface{}{"file_diff": result}, nil
+	return map[string]interface{}{DependencyFileDiff: result}, nil
 }
 
 func CountLines(file *object.Blob) (int, error) {

+ 116 - 10
diff_refiner.go

@@ -1,6 +1,10 @@
 package hercules
 
 import (
+	"unicode/utf8"
+
+	"github.com/sergi/go-diff/diffmatchpatch"
+	"gopkg.in/bblfsh/sdk.v1/uast"
 	"gopkg.in/src-d/go-git.v4"
 )
 
@@ -12,17 +16,17 @@ func (ref *FileDiffRefiner) Name() string {
 }
 
 func (ref *FileDiffRefiner) Provides() []string {
-	arr := [...]string{"file_diff"}
+	arr := [...]string{DependencyFileDiff}
 	return arr[:]
 }
 
 func (ref *FileDiffRefiner) Requires() []string {
-	arr := [...]string{"file_diff", "changed_uasts"}
+	arr := [...]string{DependencyFileDiff, DependencyUastChanges}
 	return arr[:]
 }
 
 func (ref *FileDiffRefiner) Features() []string {
-	arr := [...]string{"uast"}
+	arr := [...]string{FeatureUast}
 	return arr[:]
 }
 
@@ -36,20 +40,122 @@ func (ref *FileDiffRefiner) Initialize(repository *git.Repository) {
 }
 
 func (ref *FileDiffRefiner) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
-	changesList := deps["changed_uasts"].([]UASTChange)
+	changesList := deps[DependencyUastChanges].([]UASTChange)
 	changes := map[string]UASTChange{}
 	for _, change := range changesList {
 		if change.Before != nil && change.After != nil {
 			changes[change.Change.To.Name] = change
 		}
 	}
-	diffs := deps["file_diff"].(map[string]FileDiffData)
-	for fileName, _ /*diff*/ := range diffs {
-		_ /*change*/ = changes[fileName]
-		// TODO: scan diff line by line
-	}
+	diffs := deps[DependencyFileDiff].(map[string]FileDiffData)
 	result := map[string]FileDiffData{}
-	return map[string]interface{}{"file_diff": result}, nil
+	for fileName, oldDiff := range diffs {
+		suspicious := map[int][2]int{}
+		line := 0
+		for i, diff := range oldDiff.Diffs {
+			if i == len(oldDiff.Diffs)-1 {
+				break
+			}
+			if diff.Type == diffmatchpatch.DiffInsert &&
+				oldDiff.Diffs[i+1].Type == diffmatchpatch.DiffEqual {
+				matched := 0
+				runesAdded := []rune(diff.Text)
+				runesEqual := []rune(oldDiff.Diffs[i+1].Text)
+				for ; matched < len(runesAdded) && matched < len(runesEqual) &&
+					runesAdded[matched] == runesEqual[matched]; matched++ {
+				}
+				if matched > 0 {
+					suspicious[i] = [2]int{line, matched}
+				}
+			}
+			if diff.Type != diffmatchpatch.DiffDelete {
+				line += utf8.RuneCountInString(diff.Text)
+			}
+		}
+		if len(suspicious) == 0 {
+			result[fileName] = oldDiff
+			continue
+		}
+		uastChange := changes[fileName]
+		line2node := make([][]*uast.Node, oldDiff.NewLinesOfCode)
+		visitEachNode(uastChange.After, func(node *uast.Node) {
+			if node.StartPosition != nil && node.EndPosition != nil {
+				for l := node.StartPosition.Line; l <= node.EndPosition.Line; l++ {
+					nodes := line2node[l-1] // line starts with 1
+					if nodes == nil {
+						nodes = []*uast.Node{}
+					}
+					line2node[l-1] = append(nodes, node)
+				}
+			}
+		})
+		newDiff := FileDiffData{
+			OldLinesOfCode: oldDiff.OldLinesOfCode,
+			NewLinesOfCode: oldDiff.NewLinesOfCode,
+			Diffs:          []diffmatchpatch.Diff{},
+		}
+		skipNext := false
+		for i, diff := range oldDiff.Diffs {
+			if skipNext {
+				skipNext = false
+				continue
+			}
+			info, exists := suspicious[i]
+			if !exists {
+				newDiff.Diffs = append(newDiff.Diffs, diff)
+				continue
+			}
+			line := info[0]
+			matched := info[1]
+			size := utf8.RuneCountInString(diff.Text)
+			n1 := countNodesInInterval(line2node, line, line+size)
+			n2 := countNodesInInterval(line2node, line+matched, line+size+matched)
+			if n1 <= n2 {
+				newDiff.Diffs = append(newDiff.Diffs, diff)
+				continue
+			}
+			skipNext = true
+			runes := []rune(diff.Text)
+			newDiff.Diffs = append(newDiff.Diffs, diffmatchpatch.Diff{
+				Type: diffmatchpatch.DiffEqual, Text: string(runes[:matched]),
+			})
+			newDiff.Diffs = append(newDiff.Diffs, diffmatchpatch.Diff{
+				Type: diffmatchpatch.DiffInsert, Text: string(runes[matched:]) + string(runes[:matched]),
+			})
+			runes = []rune(oldDiff.Diffs[i+1].Text)
+			if len(runes) > matched {
+				newDiff.Diffs = append(newDiff.Diffs, diffmatchpatch.Diff{
+					Type: diffmatchpatch.DiffEqual, Text: string(runes[matched:]),
+				})
+			}
+		}
+		result[fileName] = newDiff
+	}
+	return map[string]interface{}{DependencyFileDiff: result}, nil
+}
+
+// Depth first tree traversal.
+func visitEachNode(root *uast.Node, payload func(*uast.Node)) {
+	queue := []*uast.Node{}
+	queue = append(queue, root)
+	for len(queue) > 0 {
+		node := queue[len(queue)-1]
+		queue = queue[:len(queue)-1]
+		payload(node)
+		for _, child := range node.Children {
+			queue = append(queue, child)
+		}
+	}
+}
+
+func countNodesInInterval(occupiedMap [][]*uast.Node, start, end int) int {
+	nodes := map[*uast.Node]bool{}
+	for i := start; i < end; i++ {
+		for _, node := range occupiedMap[i] {
+			nodes[node] = true
+		}
+	}
+	return len(nodes)
 }
 
 func init() {

+ 98 - 0
diff_refiner_test.go

@@ -0,0 +1,98 @@
+package hercules
+
+import (
+	"io/ioutil"
+	"path"
+	"testing"
+	"unicode/utf8"
+
+	"github.com/gogo/protobuf/proto"
+	"github.com/sergi/go-diff/diffmatchpatch"
+	"github.com/stretchr/testify/assert"
+	"gopkg.in/bblfsh/sdk.v1/uast"
+	"gopkg.in/src-d/go-git.v4/plumbing/object"
+)
+
+func fixtureFileDiffRefiner() *FileDiffRefiner {
+	fd := &FileDiffRefiner{}
+	fd.Initialize(testRepository)
+	return fd
+}
+
+func TestFileDiffRefinerMeta(t *testing.T) {
+	fd := fixtureFileDiffRefiner()
+	assert.Equal(t, fd.Name(), "FileDiffRefiner")
+	assert.Equal(t, len(fd.Provides()), 1)
+	assert.Equal(t, fd.Provides()[0], DependencyFileDiff)
+	assert.Equal(t, len(fd.Requires()), 2)
+	assert.Equal(t, fd.Requires()[0], DependencyFileDiff)
+	assert.Equal(t, fd.Requires()[1], DependencyUastChanges)
+	assert.Len(t, fd.ListConfigurationOptions(), 0)
+	fd.Configure(nil)
+	features := fd.Features()
+	assert.Len(t, features, 1)
+	assert.Equal(t, features[0], FeatureUast)
+}
+
+func TestFileDiffRefinerRegistration(t *testing.T) {
+	tp, exists := Registry.registered[(&FileDiffRefiner{}).Name()]
+	assert.True(t, exists)
+	assert.Equal(t, tp.Elem().Name(), "FileDiffRefiner")
+	tps, exists := Registry.provided[(&FileDiffRefiner{}).Provides()[0]]
+	assert.True(t, exists)
+	assert.True(t, len(tps) >= 1)
+	matched := false
+	for _, tp := range tps {
+		matched = matched || tp.Elem().Name() == "FileDiffRefiner"
+	}
+	assert.True(t, matched)
+}
+
+func TestFileDiffRefinerConsume(t *testing.T) {
+	bytes1, err := ioutil.ReadFile(path.Join("test_data", "1.java"))
+	assert.Nil(t, err)
+	bytes2, err := ioutil.ReadFile(path.Join("test_data", "2.java"))
+	assert.Nil(t, err)
+	dmp := diffmatchpatch.New()
+	src, dst, _ := dmp.DiffLinesToRunes(string(bytes1), string(bytes2))
+	state := map[string]interface{}{}
+	fileDiffs := map[string]FileDiffData{}
+	const fileName = "test.java"
+	fileDiffs[fileName] = FileDiffData{
+		OldLinesOfCode: len(src),
+		NewLinesOfCode: len(dst),
+		Diffs:          dmp.DiffMainRunes(src, dst, false),
+	}
+	state[DependencyFileDiff] = fileDiffs
+	uastChanges := make([]UASTChange, 1)
+	loadUast := func(name string) *uast.Node {
+		bytes, err := ioutil.ReadFile(path.Join("test_data", name))
+		assert.Nil(t, err)
+		node := uast.Node{}
+		proto.Unmarshal(bytes, &node)
+		return &node
+	}
+	state[DependencyUastChanges] = uastChanges
+	uastChanges[0] = UASTChange{
+		Change: &object.Change{
+			From: object.ChangeEntry{Name: fileName},
+			To:   object.ChangeEntry{Name: fileName}},
+		Before: loadUast("uast1.pb"), After: loadUast("uast2.pb"),
+	}
+	fd := fixtureFileDiffRefiner()
+	iresult, err := fd.Consume(state)
+	assert.Nil(t, err)
+	result := iresult[DependencyFileDiff].(map[string]FileDiffData)
+	assert.Len(t, result, 1)
+
+	oldDiff := fileDiffs[fileName]
+	newDiff := result[fileName]
+	assert.Equal(t, oldDiff.OldLinesOfCode, newDiff.OldLinesOfCode)
+	assert.Equal(t, oldDiff.NewLinesOfCode, newDiff.NewLinesOfCode)
+	assert.Equal(t, len(oldDiff.Diffs)+1, len(newDiff.Diffs))
+	assert.Equal(t, dmp.DiffText2(oldDiff.Diffs), dmp.DiffText2(newDiff.Diffs))
+	// Some hardcoded length checks
+	assert.Equal(t, utf8.RuneCountInString(newDiff.Diffs[5].Text), 11)
+	assert.Equal(t, utf8.RuneCountInString(newDiff.Diffs[6].Text), 41)
+	assert.Equal(t, utf8.RuneCountInString(newDiff.Diffs[7].Text), 231)
+}

+ 58 - 10
diff_test.go

@@ -20,12 +20,16 @@ func TestFileDiffMeta(t *testing.T) {
 	fd := fixtureFileDiff()
 	assert.Equal(t, fd.Name(), "FileDiff")
 	assert.Equal(t, len(fd.Provides()), 1)
-	assert.Equal(t, fd.Provides()[0], "file_diff")
+	assert.Equal(t, fd.Provides()[0], DependencyFileDiff)
 	assert.Equal(t, len(fd.Requires()), 2)
-	assert.Equal(t, fd.Requires()[0], "changes")
-	assert.Equal(t, fd.Requires()[1], "blob_cache")
-	assert.Len(t, fd.ListConfigurationOptions(), 0)
-	fd.Configure(nil)
+	assert.Equal(t, fd.Requires()[0], DependencyTreeChanges)
+	assert.Equal(t, fd.Requires()[1], DependencyBlobCache)
+	assert.Len(t, fd.ListConfigurationOptions(), 1)
+	assert.Equal(t, fd.ListConfigurationOptions()[0].Name, ConfigFileDiffDisableCleanup)
+	facts := map[string]interface{}{}
+	facts[ConfigFileDiffDisableCleanup] = true
+	fd.Configure(facts)
+	assert.True(t, fd.CleanupDisabled)
 }
 
 func TestFileDiffRegistration(t *testing.T) {
@@ -52,7 +56,7 @@ func TestFileDiffConsume(t *testing.T) {
 	cache[hash], _ = testRepository.BlobObject(hash)
 	hash = plumbing.NewHash("dc248ba2b22048cc730c571a748e8ffcf7085ab9")
 	cache[hash], _ = testRepository.BlobObject(hash)
-	deps["blob_cache"] = cache
+	deps[DependencyBlobCache] = cache
 	changes := make(object.Changes, 3)
 	treeFrom, _ := testRepository.TreeObject(plumbing.NewHash(
 		"a1eb2ea76eb7f9bfbde9b243861474421000eb96"))
@@ -95,10 +99,10 @@ func TestFileDiffConsume(t *testing.T) {
 		},
 	}, To: object.ChangeEntry{},
 	}
-	deps["changes"] = changes
+	deps[DependencyTreeChanges] = changes
 	res, err := fd.Consume(deps)
 	assert.Nil(t, err)
-	diffs := res["file_diff"].(map[string]FileDiffData)
+	diffs := res[DependencyFileDiff].(map[string]FileDiffData)
 	assert.Equal(t, len(diffs), 1)
 	diff := diffs["analyser.go"]
 	assert.Equal(t, diff.OldLinesOfCode, 307)
@@ -129,7 +133,7 @@ func TestFileDiffConsumeInvalidBlob(t *testing.T) {
 	cache[hash], _ = testRepository.BlobObject(hash)
 	hash = plumbing.NewHash("dc248ba2b22048cc730c571a748e8ffcf7085ab9")
 	cache[hash], _ = testRepository.BlobObject(hash)
-	deps["blob_cache"] = cache
+	deps[DependencyBlobCache] = cache
 	changes := make(object.Changes, 1)
 	treeFrom, _ := testRepository.TreeObject(plumbing.NewHash(
 		"a1eb2ea76eb7f9bfbde9b243861474421000eb96"))
@@ -152,7 +156,7 @@ func TestFileDiffConsumeInvalidBlob(t *testing.T) {
 			Hash: plumbing.NewHash("334cde09da4afcb74f8d2b3e6fd6cce61228b485"),
 		},
 	}}
-	deps["changes"] = changes
+	deps[DependencyTreeChanges] = changes
 	res, err := fd.Consume(deps)
 	assert.Nil(t, res)
 	assert.NotNil(t, err)
@@ -227,3 +231,47 @@ notifications:
 	assert.Equal(t, str, "")
 	assert.NotNil(t, err)
 }
+
+func TestFileDiffDarkMagic(t *testing.T) {
+	fd := fixtureFileDiff()
+	deps := map[string]interface{}{}
+	cache := map[plumbing.Hash]*object.Blob{}
+	hash := plumbing.NewHash("448eb3f312849b0ca766063d06b09481c987b309")
+	cache[hash], _ = testRepository.BlobObject(hash) // 1.java
+	hash = plumbing.NewHash("3312c92f3e8bdfbbdb30bccb6acd1b85bc338dfc")
+	cache[hash], _ = testRepository.BlobObject(hash) // 2.java
+	deps[DependencyBlobCache] = cache
+	changes := make(object.Changes, 1)
+	treeFrom, _ := testRepository.TreeObject(plumbing.NewHash(
+		"f02289bfe843388a1bb3c7dea210374082dd86b9"))
+	treeTo, _ := testRepository.TreeObject(plumbing.NewHash(
+		"eca91acf1fd828f20dcb653a061d8c97d965bc6c"))
+	changes[0] = &object.Change{From: object.ChangeEntry{
+		Name: "test.java",
+		Tree: treeFrom,
+		TreeEntry: object.TreeEntry{
+			Name: "test.java",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("448eb3f312849b0ca766063d06b09481c987b309"),
+		},
+	}, To: object.ChangeEntry{
+		Name: "test.java",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: "test.java",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("3312c92f3e8bdfbbdb30bccb6acd1b85bc338dfc"),
+		},
+	}}
+	deps[DependencyTreeChanges] = changes
+	res, err := fd.Consume(deps)
+	assert.Nil(t, err)
+	magicDiffs := res[DependencyFileDiff].(map[string]FileDiffData)["test.java"]
+	fd.CleanupDisabled = true
+	res, err = fd.Consume(deps)
+	assert.Nil(t, err)
+	plainDiffs := res[DependencyFileDiff].(map[string]FileDiffData)["test.java"]
+	assert.NotEqual(t, magicDiffs.Diffs, plainDiffs.Diffs)
+	assert.Equal(t, magicDiffs.OldLinesOfCode, plainDiffs.OldLinesOfCode)
+	assert.Equal(t, magicDiffs.NewLinesOfCode, plainDiffs.NewLinesOfCode)
+}

+ 4 - 2
identity.go

@@ -26,6 +26,8 @@ const (
 	FactIdentityDetectorReversedPeopleDict = "IdentityDetector.ReversedPeopleDict"
 	ConfigIdentityDetectorPeopleDictPath   = "IdentityDetector.PeopleDictPath"
 	FactIdentityDetectorPeopleCount        = "IdentityDetector.PeopleCount"
+
+	DependencyAuthor = "author"
 )
 
 func (id *IdentityDetector) Name() string {
@@ -33,7 +35,7 @@ func (id *IdentityDetector) Name() string {
 }
 
 func (id *IdentityDetector) Provides() []string {
-	arr := [...]string{"author"}
+	arr := [...]string{DependencyAuthor}
 	return arr[:]
 }
 
@@ -91,7 +93,7 @@ func (self *IdentityDetector) Consume(deps map[string]interface{}) (map[string]i
 			id = MISSING_AUTHOR
 		}
 	}
-	return map[string]interface{}{"author": id}, nil
+	return map[string]interface{}{DependencyAuthor: id}, nil
 }
 
 func (id *IdentityDetector) LoadPeopleDict(path string) error {

+ 3 - 3
identity_test.go

@@ -35,7 +35,7 @@ func TestIdentityDetectorMeta(t *testing.T) {
 	assert.Equal(t, id.Name(), "IdentityDetector")
 	assert.Equal(t, len(id.Requires()), 0)
 	assert.Equal(t, len(id.Provides()), 1)
-	assert.Equal(t, id.Provides()[0], "author")
+	assert.Equal(t, id.Provides()[0], DependencyAuthor)
 	opts := id.ListConfigurationOptions()
 	assert.Len(t, opts, 1)
 	assert.Equal(t, opts[0].Name, ConfigIdentityDetectorPeopleDictPath)
@@ -135,13 +135,13 @@ func TestIdentityDetectorConsume(t *testing.T) {
 	deps["commit"] = commit
 	res, err := fixtureIdentityDetector().Consume(deps)
 	assert.Nil(t, err)
-	assert.Equal(t, res["author"].(int), 0)
+	assert.Equal(t, res[DependencyAuthor].(int), 0)
 	commit, _ = testRepository.CommitObject(plumbing.NewHash(
 		"8a03b5620b1caa72ec9cb847ea88332621e2950a"))
 	deps["commit"] = commit
 	res, err = fixtureIdentityDetector().Consume(deps)
 	assert.Nil(t, err)
-	assert.Equal(t, res["author"].(int), MISSING_AUTHOR)
+	assert.Equal(t, res[DependencyAuthor].(int), MISSING_AUTHOR)
 }
 
 func TestIdentityDetectorLoadPeopleDict(t *testing.T) {

+ 2 - 2
pipeline_test.go

@@ -336,7 +336,7 @@ func TestPipelineError(t *testing.T) {
 
 func TestPipelineSerialize(t *testing.T) {
 	pipeline := NewPipeline(testRepository)
-	pipeline.SetFeature("uast")
+	pipeline.SetFeature(FeatureUast)
 	pipeline.DeployItem(&BurndownAnalysis{})
 	facts := map[string]interface{}{}
 	facts["Pipeline.DryRun"] = true
@@ -376,7 +376,7 @@ func TestPipelineSerialize(t *testing.T) {
 
 func TestPipelineSerializeNoUast(t *testing.T) {
 	pipeline := NewPipeline(testRepository)
-	// pipeline.SetFeature("uast")
+	// pipeline.SetFeature(FeatureUast)
 	pipeline.DeployItem(&BurndownAnalysis{})
 	facts := map[string]interface{}{}
 	facts["Pipeline.DryRun"] = true

+ 5 - 5
renames.go

@@ -33,12 +33,12 @@ func (ra *RenameAnalysis) Name() string {
 }
 
 func (ra *RenameAnalysis) Provides() []string {
-	arr := [...]string{"changes"}
+	arr := [...]string{DependencyTreeChanges}
 	return arr[:]
 }
 
 func (ra *RenameAnalysis) Requires() []string {
-	arr := [...]string{"blob_cache", "changes"}
+	arr := [...]string{DependencyBlobCache, DependencyTreeChanges}
 	return arr[:]
 }
 
@@ -69,8 +69,8 @@ func (ra *RenameAnalysis) Initialize(repository *git.Repository) {
 }
 
 func (ra *RenameAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
-	changes := deps["changes"].(object.Changes)
-	cache := deps["blob_cache"].(map[plumbing.Hash]*object.Blob)
+	changes := deps[DependencyTreeChanges].(object.Changes)
+	cache := deps[DependencyBlobCache].(map[plumbing.Hash]*object.Blob)
 
 	reduced_changes := make(object.Changes, 0, changes.Len())
 
@@ -176,7 +176,7 @@ func (ra *RenameAnalysis) Consume(deps map[string]interface{}) (map[string]inter
 	for _, blob := range deleted_blobs {
 		reduced_changes = append(reduced_changes, blob.change)
 	}
-	return map[string]interface{}{"changes": reduced_changes}, nil
+	return map[string]interface{}{DependencyTreeChanges: reduced_changes}, nil
 }
 
 func (ra *RenameAnalysis) sizesAreClose(size1 int64, size2 int64) bool {

+ 7 - 7
renames_test.go

@@ -17,10 +17,10 @@ func TestRenameAnalysisMeta(t *testing.T) {
 	ra := fixtureRenameAnalysis()
 	assert.Equal(t, ra.Name(), "RenameAnalysis")
 	assert.Equal(t, len(ra.Provides()), 1)
-	assert.Equal(t, ra.Provides()[0], "changes")
+	assert.Equal(t, ra.Provides()[0], DependencyTreeChanges)
 	assert.Equal(t, len(ra.Requires()), 2)
-	assert.Equal(t, ra.Requires()[0], "blob_cache")
-	assert.Equal(t, ra.Requires()[1], "changes")
+	assert.Equal(t, ra.Requires()[0], DependencyBlobCache)
+	assert.Equal(t, ra.Requires()[1], DependencyTreeChanges)
 	opts := ra.ListConfigurationOptions()
 	assert.Len(t, opts, 1)
 	assert.Equal(t, opts[0].Name, ConfigRenameAnalysisSimilarityThreshold)
@@ -117,17 +117,17 @@ func TestRenameAnalysisConsume(t *testing.T) {
 	hash = plumbing.NewHash("f7d918ec500e2f925ecde79b51cc007bac27de72")
 	cache[hash], _ = testRepository.BlobObject(hash)
 	deps := map[string]interface{}{}
-	deps["blob_cache"] = cache
-	deps["changes"] = changes
+	deps[DependencyBlobCache] = cache
+	deps[DependencyTreeChanges] = changes
 	ra.SimilarityThreshold = 33
 	res, err := ra.Consume(deps)
 	assert.Nil(t, err)
-	renamed := res["changes"].(object.Changes)
+	renamed := res[DependencyTreeChanges].(object.Changes)
 	assert.Equal(t, len(renamed), 2)
 	ra.SimilarityThreshold = 35
 	res, err = ra.Consume(deps)
 	assert.Nil(t, err)
-	renamed = res["changes"].(object.Changes)
+	renamed = res[DependencyTreeChanges].(object.Changes)
 	assert.Equal(t, len(renamed), 3)
 }
 

+ 295 - 0
test_data/1.java

@@ -0,0 +1,295 @@
+/**
+ *    Copyright (C) 2012 ZeroTurnaround LLC <support@zeroturnaround.com>
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *        http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipOutputStream;
+
+import junit.framework.TestCase;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import org.zeroturnaround.zip.ZipEntrySource;
+import org.zeroturnaround.zip.ZipException;
+import org.zeroturnaround.zip.ZipUtil;
+
+public class ZipUtilTest extends TestCase {
+
+  public void testUnpackEntryFromFile() throws IOException {
+    final String name = "foo";
+    final byte[] contents = "bar".getBytes();
+
+    File file = File.createTempFile("temp", null);
+    try {
+      // Create the ZIP file
+      ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(file));
+      try {
+        zos.putNextEntry(new ZipEntry(name));
+        zos.write(contents);
+        zos.closeEntry();
+      }
+      finally {
+        IOUtils.closeQuietly(zos);
+      }
+
+      // Test the ZipUtil
+      byte[] actual = ZipUtil.unpackEntry(file, name);
+      assertNotNull(actual);
+      assertEquals(new String(contents), new String(actual));
+    }
+    // 1
+    
+    // 2
+    
+    // 3
+    finally {
+      FileUtils.deleteQuietly(file);
+    }
+  }
+  
+  public void testUnpackEntryFromStream() throws IOException {
+    final String name = "foo";
+    final byte[] contents = "bar".getBytes();
+
+    File file = File.createTempFile("temp", null);
+    try {
+      // Create the ZIP file
+      ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(file));
+      try {
+        zos.putNextEntry(new ZipEntry(name));
+        zos.write(contents);
+        zos.closeEntry();
+      }
+      finally {
+        IOUtils.closeQuietly(zos);
+      }
+
+      FileInputStream fis = new FileInputStream(file);
+      // Test the ZipUtil
+      byte[] actual = ZipUtil.unpackEntry(fis, name);
+      assertNotNull(actual);
+      assertEquals(new String(contents), new String(actual));
+    }
+    // 1
+    
+    // 2
+    
+    // 3
+    finally {
+      FileUtils.deleteQuietly(file);
+    }
+  }
+
+  public void testDuplicateEntryAtAdd() throws IOException {
+    File src = new File(getClass().getResource("duplicate.zip").getPath());
+
+    File dest = File.createTempFile("temp", null);
+    try {
+      ZipUtil.addEntries(src, new ZipEntrySource[0], dest);
+    }
+    finally {
+      FileUtils.deleteQuietly(dest);
+    }
+  }
+
+  public void testDuplicateEntryAtReplace() throws IOException {
+    File src = new File(getClass().getResource("duplicate.zip").getPath());
+
+    File dest = File.createTempFile("temp", null);
+    try {
+      ZipUtil.replaceEntries(src, new ZipEntrySource[0], dest);
+    }
+    finally {
+      FileUtils.deleteQuietly(dest);
+    }
+  }
+
+  public void testDuplicateEntryAtAddOrReplace() throws IOException {
+    File src = new File(getClass().getResource("duplicate.zip").getPath());
+
+    File dest = File.createTempFile("temp", null);
+    try {
+      ZipUtil.addOrReplaceEntries(src, new ZipEntrySource[0], dest);
+    }
+    finally {
+      FileUtils.deleteQuietly(dest);
+    }
+  }
+
+  public void testUnexplode() throws IOException {
+    File file = File.createTempFile("tempFile", null);
+    File tmpDir = file.getParentFile();
+
+    unexplodeWithException(file, "shouldn't be able to unexplode file that is not a directory");
+    assertTrue("Should be able to delete tmp file", file.delete());
+    unexplodeWithException(file, "shouldn't be able to unexplode file that doesn't exist");
+
+    // create empty tmp dir with the same name as deleted file
+    File dir = new File(tmpDir, file.getName());
+    dir.deleteOnExit();
+    assertTrue("Should be able to create directory with the same name as there was tmp file", dir.mkdir());
+
+    unexplodeWithException(dir, "shouldn't be able to unexplode dir that doesn't contain any files");
+
+    // unexplode should succeed with at least one file in directory
+    File.createTempFile("temp", null, dir);
+    ZipUtil.unexplode(dir);
+
+    assertTrue("zip file should exist with the same name as the directory that was unexploded", dir.exists());
+    assertTrue("unexploding input directory should have produced zip file with the same name", !dir.isDirectory());
+    assertTrue("Should be able to delete zip that was created from directory", dir.delete());
+  }
+
+  public void testPackEntry() throws Exception {
+    File fileToPack = new File(getClass().getResource("TestFile.txt").getPath());
+    File dest = File.createTempFile("temp", null);
+    ZipUtil.packEntry(fileToPack, dest);
+    assertTrue(dest.exists());
+
+    ZipUtil.explode(dest);
+    assertTrue((new File(dest, "TestFile.txt")).exists());
+    // if fails then maybe somebody changed the file contents and did not update
+    // the test
+    assertEquals(108, (new File(dest, "TestFile.txt")).length());
+  }
+
+  public void testPackEntries() throws Exception {
+    File fileToPack = new File(getClass().getResource("TestFile.txt").getPath());
+    File fileToPackII = new File(getClass().getResource("TestFile-II.txt").getPath());
+    File dest = File.createTempFile("temp", null);
+    ZipUtil.packEntries(new File[] { fileToPack, fileToPackII }, dest);
+    assertTrue(dest.exists());
+
+    ZipUtil.explode(dest);
+    assertTrue((new File(dest, "TestFile.txt")).exists());
+    assertTrue((new File(dest, "TestFile-II.txt")).exists());
+    // if fails then maybe somebody changed the file contents and did not update
+    // the test
+    assertEquals(108, (new File(dest, "TestFile.txt")).length());
+    assertEquals(103, (new File(dest, "TestFile-II.txt")).length());
+  }
+
+  public void testZipException() {
+    boolean exceptionThrown = false;
+    try {
+      ZipUtil.pack(new File("nonExistent"), new File("weeheha"));
+    }
+    catch (ZipException e) {
+      exceptionThrown = true;
+    }
+    assertTrue(exceptionThrown);
+  }
+
+  public void testPreserveRoot() throws Exception {
+    File dest = File.createTempFile("temp", null);
+    File parent = new File(getClass().getResource("TestFile.txt").getPath()).getParentFile();
+    ZipUtil.pack(parent, dest, true);
+    ZipUtil.explode(dest);
+    assertTrue((new File(dest, parent.getName())).exists());
+  }
+
+  private void unexplodeWithException(File file, String message) {
+    boolean ok = false;
+    try {
+      ZipUtil.unexplode(file);
+    }
+    catch (Exception e) {
+      ok = true;
+    }
+    assertTrue(message, ok);
+  }
+
+  public void testArchiveEquals() {
+    File src = new File(getClass().getResource("demo.zip").getPath());
+    // byte-by-byte copy
+    File src2 = new File(getClass().getResource("demo-copy.zip").getPath());
+    assertTrue(ZipUtil.archiveEquals(src, src2));
+    
+    // entry by entry copy
+    File src3 = new File(getClass().getResource("demo-copy-II.zip").getPath());
+    assertTrue(ZipUtil.archiveEquals(src, src3));
+  }
+  
+  public void testRepackArchive() throws IOException {
+    File src = new File(getClass().getResource("demo.zip").getPath());
+    File dest = File.createTempFile("temp", null);
+
+    ZipUtil.repack(src, dest, 1);
+
+    assertTrue(ZipUtil.archiveEquals(src, dest));
+  }
+
+
+  public void testContainsAnyEntry() throws IOException {
+    File src = new File(getClass().getResource("demo.zip").getPath());
+    boolean exists = ZipUtil.containsAnyEntry(src, new String[] { "foo.txt", "bar.txt" });
+    assertTrue(exists);
+
+    exists = ZipUtil.containsAnyEntry(src, new String[] { "foo.txt", "does-not-exist.txt" });
+    assertTrue(exists);
+
+    exists = ZipUtil.containsAnyEntry(src, new String[] { "does-not-exist-I.txt", "does-not-exist-II.txt" });
+    assertFalse(exists);
+  }
+
+  public void testAddEntry() throws IOException {
+    File src = new File(getClass().getResource("demo.zip").getPath());
+    final String fileName = "TestFile.txt";
+    assertFalse(ZipUtil.containsEntry(src, fileName));
+    File newEntry = new File(getClass().getResource(fileName).getPath());
+    File dest = File.createTempFile("temp.zip", null);
+
+    ZipUtil.addEntry(src, fileName, newEntry, dest);
+    assertTrue(ZipUtil.containsEntry(dest, fileName));
+  }
+
+  public void testRemoveEntry() throws IOException {
+    File src = new File(getClass().getResource("demo.zip").getPath());
+
+    File dest = File.createTempFile("temp", null);
+    try {
+      ZipUtil.removeEntry(src, "bar.txt", dest);
+      assertTrue("Result zip misses entry 'foo.txt'", ZipUtil.containsEntry(dest, "foo.txt"));
+      assertTrue("Result zip misses entry 'foo1.txt'", ZipUtil.containsEntry(dest, "foo1.txt"));
+      assertTrue("Result zip misses entry 'foo2.txt'", ZipUtil.containsEntry(dest, "foo2.txt"));
+      assertFalse("Result zip still contains 'bar.txt'", ZipUtil.containsEntry(dest, "bar.txt"));
+    }
+    finally {
+      FileUtils.deleteQuietly(dest);
+    }
+  }
+
+  public void testRemoveDirs() throws IOException {
+    File src = new File(getClass().getResource("demo-dirs.zip").getPath());
+
+    File dest = File.createTempFile("temp", null);
+    try {
+      ZipUtil.removeEntries(src, new String[] { "bar.txt", "a/b" }, dest);
+
+      assertFalse("Result zip still contains 'bar.txt'", ZipUtil.containsEntry(dest, "bar.txt"));
+      assertFalse("Result zip still contains dir 'a/b'", ZipUtil.containsEntry(dest, "a/b"));
+      assertTrue("Result doesn't containt 'attic'", ZipUtil.containsEntry(dest, "attic/treasure.txt"));
+      assertTrue("Entry whose prefix is dir name is removed too: 'b.txt'", ZipUtil.containsEntry(dest, "a/b.txt"));
+      assertFalse("Entry in a removed dir is still there: 'a/b/c.txt'", ZipUtil.containsEntry(dest, "a/b/c.txt"));
+
+    }
+    finally {
+      FileUtils.deleteQuietly(dest);
+    }
+  }
+}

+ 338 - 0
test_data/2.java

@@ -0,0 +1,338 @@
+/**
+ *    Copyright (C) 2012 ZeroTurnaround LLC <support@zeroturnaround.com>
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *        http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipOutputStream;
+
+import junit.framework.TestCase;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import org.zeroturnaround.zip.ZipEntrySource;
+import org.zeroturnaround.zip.ZipException;
+import org.zeroturnaround.zip.ZipUtil;
+
+public class ZipUtilTest extends TestCase {
+
+  public void testUnpackEntryFromFile() throws IOException {
+    final String name = "foo";
+    final byte[] contents = "bar".getBytes();
+
+    File file = File.createTempFile("temp", null);
+    try {
+      // Create the ZIP file
+      ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(file));
+      try {
+        zos.putNextEntry(new ZipEntry(name));
+        zos.write(contents);
+        zos.closeEntry();
+      }
+      finally {
+        IOUtils.closeQuietly(zos);
+      }
+
+      // Test the ZipUtil
+      byte[] actual = ZipUtil.unpackEntry(file, name);
+      assertNotNull(actual);
+      assertEquals(new String(contents), new String(actual));
+    }
+    // 1
+    
+    // 2
+    
+    // 3
+    finally {
+      FileUtils.deleteQuietly(file);
+    }
+  }
+  
+  public void testUnpackEntryFromStreamToFile() throws IOException {
+    final String name = "foo";
+    final byte[] contents = "bar".getBytes();
+
+    File file = File.createTempFile("temp", null);
+    try {
+      // Create the ZIP file
+      ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(file));
+      try {
+        zos.putNextEntry(new ZipEntry(name));
+        zos.write(contents);
+        zos.closeEntry();
+      }
+      finally {
+        IOUtils.closeQuietly(zos);
+      }
+
+      FileInputStream fis = new FileInputStream(file);
+
+      File outputFile = File.createTempFile("temp-output", null);
+
+      boolean result = ZipUtil.unpackEntry(fis, name, outputFile);
+      assertTrue(result);
+      
+      BufferedInputStream bis = new BufferedInputStream(new FileInputStream(outputFile));
+      byte[] actual = new byte[1024];
+      int read = bis.read(actual);
+      bis.close();
+      
+      assertEquals(new String(contents), new String(actual, 0, read));
+    }
+    // 1
+    
+    // 2
+    
+    // 3
+    finally {
+      FileUtils.deleteQuietly(file);
+    }
+  }
+  
+  public void testUnpackEntryFromStream() throws IOException {
+    final String name = "foo";
+    final byte[] contents = "bar".getBytes();
+
+    File file = File.createTempFile("temp", null);
+    try {
+      // Create the ZIP file
+      ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(file));
+      try {
+        zos.putNextEntry(new ZipEntry(name));
+        zos.write(contents);
+        zos.closeEntry();
+      }
+      finally {
+        IOUtils.closeQuietly(zos);
+      }
+
+      FileInputStream fis = new FileInputStream(file);
+      // Test the ZipUtil
+      byte[] actual = ZipUtil.unpackEntry(fis, name);
+      assertNotNull(actual);
+      assertEquals(new String(contents), new String(actual));
+    }
+    // 1
+    
+    // 2
+    
+    // 3
+    finally {
+      FileUtils.deleteQuietly(file);
+    }
+  }
+
+  public void testDuplicateEntryAtAdd() throws IOException {
+    File src = new File(getClass().getResource("duplicate.zip").getPath());
+
+    File dest = File.createTempFile("temp", null);
+    try {
+      ZipUtil.addEntries(src, new ZipEntrySource[0], dest);
+    }
+    finally {
+      FileUtils.deleteQuietly(dest);
+    }
+  }
+
+  public void testDuplicateEntryAtReplace() throws IOException {
+    File src = new File(getClass().getResource("duplicate.zip").getPath());
+
+    File dest = File.createTempFile("temp", null);
+    try {
+      ZipUtil.replaceEntries(src, new ZipEntrySource[0], dest);
+    }
+    finally {
+      FileUtils.deleteQuietly(dest);
+    }
+  }
+
+  public void testDuplicateEntryAtAddOrReplace() throws IOException {
+    File src = new File(getClass().getResource("duplicate.zip").getPath());
+
+    File dest = File.createTempFile("temp", null);
+    try {
+      ZipUtil.addOrReplaceEntries(src, new ZipEntrySource[0], dest);
+    }
+    finally {
+      FileUtils.deleteQuietly(dest);
+    }
+  }
+
+  public void testUnexplode() throws IOException {
+    File file = File.createTempFile("tempFile", null);
+    File tmpDir = file.getParentFile();
+
+    unexplodeWithException(file, "shouldn't be able to unexplode file that is not a directory");
+    assertTrue("Should be able to delete tmp file", file.delete());
+    unexplodeWithException(file, "shouldn't be able to unexplode file that doesn't exist");
+
+    // create empty tmp dir with the same name as deleted file
+    File dir = new File(tmpDir, file.getName());
+    dir.deleteOnExit();
+    assertTrue("Should be able to create directory with the same name as there was tmp file", dir.mkdir());
+
+    unexplodeWithException(dir, "shouldn't be able to unexplode dir that doesn't contain any files");
+
+    // unexplode should succeed with at least one file in directory
+    File.createTempFile("temp", null, dir);
+    ZipUtil.unexplode(dir);
+
+    assertTrue("zip file should exist with the same name as the directory that was unexploded", dir.exists());
+    assertTrue("unexploding input directory should have produced zip file with the same name", !dir.isDirectory());
+    assertTrue("Should be able to delete zip that was created from directory", dir.delete());
+  }
+
+  public void testPackEntry() throws Exception {
+    File fileToPack = new File(getClass().getResource("TestFile.txt").getPath());
+    File dest = File.createTempFile("temp", null);
+    ZipUtil.packEntry(fileToPack, dest);
+    assertTrue(dest.exists());
+
+    ZipUtil.explode(dest);
+    assertTrue((new File(dest, "TestFile.txt")).exists());
+    // if fails then maybe somebody changed the file contents and did not update
+    // the test
+    assertEquals(108, (new File(dest, "TestFile.txt")).length());
+  }
+
+  public void testPackEntries() throws Exception {
+    File fileToPack = new File(getClass().getResource("TestFile.txt").getPath());
+    File fileToPackII = new File(getClass().getResource("TestFile-II.txt").getPath());
+    File dest = File.createTempFile("temp", null);
+    ZipUtil.packEntries(new File[] { fileToPack, fileToPackII }, dest);
+    assertTrue(dest.exists());
+
+    ZipUtil.explode(dest);
+    assertTrue((new File(dest, "TestFile.txt")).exists());
+    assertTrue((new File(dest, "TestFile-II.txt")).exists());
+    // if fails then maybe somebody changed the file contents and did not update
+    // the test
+    assertEquals(108, (new File(dest, "TestFile.txt")).length());
+    assertEquals(103, (new File(dest, "TestFile-II.txt")).length());
+  }
+
+  public void testZipException() {
+    boolean exceptionThrown = false;
+    try {
+      ZipUtil.pack(new File("nonExistent"), new File("weeheha"));
+    }
+    catch (ZipException e) {
+      exceptionThrown = true;
+    }
+    assertTrue(exceptionThrown);
+  }
+
+  public void testPreserveRoot() throws Exception {
+    File dest = File.createTempFile("temp", null);
+    File parent = new File(getClass().getResource("TestFile.txt").getPath()).getParentFile();
+    ZipUtil.pack(parent, dest, true);
+    ZipUtil.explode(dest);
+    assertTrue((new File(dest, parent.getName())).exists());
+  }
+
+  private void unexplodeWithException(File file, String message) {
+    boolean ok = false;
+    try {
+      ZipUtil.unexplode(file);
+    }
+    catch (Exception e) {
+      ok = true;
+    }
+    assertTrue(message, ok);
+  }
+
+  public void testArchiveEquals() {
+    File src = new File(getClass().getResource("demo.zip").getPath());
+    // byte-by-byte copy
+    File src2 = new File(getClass().getResource("demo-copy.zip").getPath());
+    assertTrue(ZipUtil.archiveEquals(src, src2));
+    
+    // entry by entry copy
+    File src3 = new File(getClass().getResource("demo-copy-II.zip").getPath());
+    assertTrue(ZipUtil.archiveEquals(src, src3));
+  }
+  
+  public void testRepackArchive() throws IOException {
+    File src = new File(getClass().getResource("demo.zip").getPath());
+    File dest = File.createTempFile("temp", null);
+
+    ZipUtil.repack(src, dest, 1);
+
+    assertTrue(ZipUtil.archiveEquals(src, dest));
+  }
+
+
+  public void testContainsAnyEntry() throws IOException {
+    File src = new File(getClass().getResource("demo.zip").getPath());
+    boolean exists = ZipUtil.containsAnyEntry(src, new String[] { "foo.txt", "bar.txt" });
+    assertTrue(exists);
+
+    exists = ZipUtil.containsAnyEntry(src, new String[] { "foo.txt", "does-not-exist.txt" });
+    assertTrue(exists);
+
+    exists = ZipUtil.containsAnyEntry(src, new String[] { "does-not-exist-I.txt", "does-not-exist-II.txt" });
+    assertFalse(exists);
+  }
+
+  public void testAddEntry() throws IOException {
+    File src = new File(getClass().getResource("demo.zip").getPath());
+    final String fileName = "TestFile.txt";
+    assertFalse(ZipUtil.containsEntry(src, fileName));
+    File newEntry = new File(getClass().getResource(fileName).getPath());
+    File dest = File.createTempFile("temp.zip", null);
+
+    ZipUtil.addEntry(src, fileName, newEntry, dest);
+    assertTrue(ZipUtil.containsEntry(dest, fileName));
+  }
+
+  public void testRemoveEntry() throws IOException {
+    File src = new File(getClass().getResource("demo.zip").getPath());
+
+    File dest = File.createTempFile("temp", null);
+    try {
+      ZipUtil.removeEntry(src, "bar.txt", dest);
+      assertTrue("Result zip misses entry 'foo.txt'", ZipUtil.containsEntry(dest, "foo.txt"));
+      assertTrue("Result zip misses entry 'foo1.txt'", ZipUtil.containsEntry(dest, "foo1.txt"));
+      assertTrue("Result zip misses entry 'foo2.txt'", ZipUtil.containsEntry(dest, "foo2.txt"));
+      assertFalse("Result zip still contains 'bar.txt'", ZipUtil.containsEntry(dest, "bar.txt"));
+    }
+    finally {
+      FileUtils.deleteQuietly(dest);
+    }
+  }
+
+  public void testRemoveDirs() throws IOException {
+    File src = new File(getClass().getResource("demo-dirs.zip").getPath());
+
+    File dest = File.createTempFile("temp", null);
+    try {
+      ZipUtil.removeEntries(src, new String[] { "bar.txt", "a/b" }, dest);
+
+      assertFalse("Result zip still contains 'bar.txt'", ZipUtil.containsEntry(dest, "bar.txt"));
+      assertFalse("Result zip still contains dir 'a/b'", ZipUtil.containsEntry(dest, "a/b"));
+      assertTrue("Result doesn't containt 'attic'", ZipUtil.containsEntry(dest, "attic/treasure.txt"));
+      assertTrue("Entry whose prefix is dir name is removed too: 'b.txt'", ZipUtil.containsEntry(dest, "a/b.txt"));
+      assertFalse("Entry in a removed dir is still there: 'a/b/c.txt'", ZipUtil.containsEntry(dest, "a/b/c.txt"));
+
+    }
+    finally {
+      FileUtils.deleteQuietly(dest);
+    }
+  }
+}

文件差異過大導致無法顯示
+ 3938 - 0
test_data/uast1.pb


文件差異過大導致無法顯示
+ 4445 - 0
test_data/uast2.pb


+ 6 - 2
tree_diff.go

@@ -11,12 +11,16 @@ type TreeDiff struct {
 	previousTree *object.Tree
 }
 
+const (
+	DependencyTreeChanges = "changes"
+)
+
 func (treediff *TreeDiff) Name() string {
 	return "TreeDiff"
 }
 
 func (treediff *TreeDiff) Provides() []string {
-	arr := [...]string{"changes"}
+	arr := [...]string{DependencyTreeChanges}
 	return arr[:]
 }
 
@@ -70,7 +74,7 @@ func (treediff *TreeDiff) Consume(deps map[string]interface{}) (map[string]inter
 		}
 	}
 	treediff.previousTree = tree
-	return map[string]interface{}{"changes": diff}, nil
+	return map[string]interface{}{DependencyTreeChanges: diff}, nil
 }
 
 func init() {

+ 3 - 3
tree_diff_test.go

@@ -21,7 +21,7 @@ func TestTreeDiffMeta(t *testing.T) {
 	assert.Equal(t, td.Name(), "TreeDiff")
 	assert.Equal(t, len(td.Requires()), 0)
 	assert.Equal(t, len(td.Provides()), 1)
-	assert.Equal(t, td.Provides()[0], "changes")
+	assert.Equal(t, td.Provides()[0], DependencyTreeChanges)
 	opts := td.ListConfigurationOptions()
 	assert.Len(t, opts, 0)
 }
@@ -52,7 +52,7 @@ func TestTreeDiffConsume(t *testing.T) {
 	res, err := td.Consume(deps)
 	assert.Nil(t, err)
 	assert.Equal(t, len(res), 1)
-	changes := res["changes"].(object.Changes)
+	changes := res[DependencyTreeChanges].(object.Changes)
 	assert.Equal(t, len(changes), 12)
 	baseline := map[string]merkletrie.Action{
 		"analyser.go":               merkletrie.Delete,
@@ -90,7 +90,7 @@ func TestTreeDiffConsumeFirst(t *testing.T) {
 	res, err := td.Consume(deps)
 	assert.Nil(t, err)
 	assert.Equal(t, len(res), 1)
-	changes := res["changes"].(object.Changes)
+	changes := res[DependencyTreeChanges].(object.Changes)
 	assert.Equal(t, len(changes), 21)
 	for _, change := range changes {
 		action, err := change.Action()

+ 22 - 15
uast.go

@@ -48,6 +48,9 @@ const (
 	ConfigUASTPoolSize     = "ConfigUASTPoolSize"
 	ConfigUASTFailOnErrors = "ConfigUASTFailOnErrors"
 	ConfigUASTLanguages    = "ConfigUASTLanguages"
+
+	FeatureUast     = "uast"
+	DependencyUasts = "uasts"
 )
 
 type uastTask struct {
@@ -79,17 +82,17 @@ func (exr *UASTExtractor) Name() string {
 }
 
 func (exr *UASTExtractor) Provides() []string {
-	arr := [...]string{"uasts"}
+	arr := [...]string{DependencyUasts}
 	return arr[:]
 }
 
 func (exr *UASTExtractor) Requires() []string {
-	arr := [...]string{"changes", "blob_cache"}
+	arr := [...]string{DependencyTreeChanges, DependencyBlobCache}
 	return arr[:]
 }
 
 func (exr *UASTExtractor) Features() []string {
-	arr := [...]string{"uast"}
+	arr := [...]string{FeatureUast}
 	return arr[:]
 }
 
@@ -185,8 +188,8 @@ func (exr *UASTExtractor) Initialize(repository *git.Repository) {
 }
 
 func (exr *UASTExtractor) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
-	cache := deps["blob_cache"].(map[plumbing.Hash]*object.Blob)
-	treeDiffs := deps["changes"].(object.Changes)
+	cache := deps[DependencyBlobCache].(map[plumbing.Hash]*object.Blob)
+	treeDiffs := deps[DependencyTreeChanges].(object.Changes)
 	uasts := map[plumbing.Hash]*uast.Node{}
 	lock := sync.RWMutex{}
 	errs := make([]error, 0)
@@ -249,7 +252,7 @@ func (exr *UASTExtractor) Consume(deps map[string]interface{}) (map[string]inter
 			fmt.Fprintln(os.Stderr, joined)
 		}
 	}
-	return map[string]interface{}{"uasts": uasts}, nil
+	return map[string]interface{}{DependencyUasts: uasts}, nil
 }
 
 func (exr *UASTExtractor) extractUAST(
@@ -303,6 +306,10 @@ type UASTChange struct {
 	Change *object.Change
 }
 
+const (
+	DependencyUastChanges = "changed_uasts"
+)
+
 type UASTChanges struct {
 	cache map[plumbing.Hash]*uast.Node
 }
@@ -312,17 +319,17 @@ func (uc *UASTChanges) Name() string {
 }
 
 func (uc *UASTChanges) Provides() []string {
-	arr := [...]string{"changed_uasts"}
+	arr := [...]string{DependencyUastChanges}
 	return arr[:]
 }
 
 func (uc *UASTChanges) Requires() []string {
-	arr := [...]string{"uasts", "changes"}
+	arr := [...]string{DependencyUasts, DependencyTreeChanges}
 	return arr[:]
 }
 
 func (uc *UASTChanges) Features() []string {
-	arr := [...]string{"uast"}
+	arr := [...]string{FeatureUast}
 	return arr[:]
 }
 
@@ -337,8 +344,8 @@ func (uc *UASTChanges) Initialize(repository *git.Repository) {
 }
 
 func (uc *UASTChanges) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
-	uasts := deps["uasts"].(map[plumbing.Hash]*uast.Node)
-	treeDiffs := deps["changes"].(object.Changes)
+	uasts := deps[DependencyUasts].(map[plumbing.Hash]*uast.Node)
+	treeDiffs := deps[DependencyTreeChanges].(object.Changes)
 	commit := make([]UASTChange, 0, len(treeDiffs))
 	for _, change := range treeDiffs {
 		action, err := change.Action()
@@ -364,7 +371,7 @@ func (uc *UASTChanges) Consume(deps map[string]interface{}) (map[string]interfac
 			uc.cache[hashTo] = uastTo
 		}
 	}
-	return map[string]interface{}{"changed_uasts": commit}, nil
+	return map[string]interface{}{DependencyUastChanges: commit}, nil
 }
 
 type UASTChangesSaver struct {
@@ -388,12 +395,12 @@ func (saver *UASTChangesSaver) Provides() []string {
 }
 
 func (saver *UASTChangesSaver) Requires() []string {
-	arr := [...]string{"changed_uasts"}
+	arr := [...]string{DependencyUastChanges}
 	return arr[:]
 }
 
 func (saver *UASTChangesSaver) Features() []string {
-	arr := [...]string{"uast"}
+	arr := [...]string{FeatureUast}
 	return arr[:]
 }
 
@@ -424,7 +431,7 @@ func (saver *UASTChangesSaver) Initialize(repository *git.Repository) {
 }
 
 func (saver *UASTChangesSaver) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
-	changes := deps["changed_uasts"].([]UASTChange)
+	changes := deps[DependencyUastChanges].([]UASTChange)
 	saver.result = append(saver.result, changes)
 	return nil, nil
 }

+ 19 - 19
uast_test.go

@@ -27,10 +27,10 @@ func TestUASTExtractorMeta(t *testing.T) {
 	exr := fixtureUASTExtractor()
 	assert.Equal(t, exr.Name(), "UAST")
 	assert.Equal(t, len(exr.Provides()), 1)
-	assert.Equal(t, exr.Provides()[0], "uasts")
+	assert.Equal(t, exr.Provides()[0], DependencyUasts)
 	assert.Equal(t, len(exr.Requires()), 2)
-	assert.Equal(t, exr.Requires()[0], "changes")
-	assert.Equal(t, exr.Requires()[1], "blob_cache")
+	assert.Equal(t, exr.Requires()[0], DependencyTreeChanges)
+	assert.Equal(t, exr.Requires()[1], DependencyBlobCache)
 	opts := exr.ListConfigurationOptions()
 	assert.Len(t, opts, 5)
 	assert.Equal(t, opts[0].Name, ConfigUASTEndpoint)
@@ -40,7 +40,7 @@ func TestUASTExtractorMeta(t *testing.T) {
 	assert.Equal(t, opts[4].Name, ConfigUASTLanguages)
 	feats := exr.Features()
 	assert.Len(t, feats, 1)
-	assert.Equal(t, feats[0], "uast")
+	assert.Equal(t, feats[0], FeatureUast)
 }
 
 func TestUASTExtractorConfiguration(t *testing.T) {
@@ -118,16 +118,16 @@ func TestUASTExtractorConsume(t *testing.T) {
 	hash = plumbing.NewHash("f7d918ec500e2f925ecde79b51cc007bac27de72")
 	cache[hash], _ = testRepository.BlobObject(hash)
 	deps := map[string]interface{}{}
-	deps["blob_cache"] = cache
-	deps["changes"] = changes
+	deps[DependencyBlobCache] = cache
+	deps[DependencyTreeChanges] = changes
 	res, err := exr.Consume(deps)
 	// Language not enabled
-	assert.Len(t, res["uasts"], 0)
+	assert.Len(t, res[DependencyUasts], 0)
 	assert.Nil(t, err)
 	exr.Languages["Go"] = true
 	res, err = exr.Consume(deps)
 	// No Go driver
-	assert.Len(t, res["uasts"], 0)
+	assert.Len(t, res[DependencyUasts], 0)
 	assert.Nil(t, err)
 
 	hash = plumbing.NewHash("5d78f57d732aed825764347ec6f3ab74d50d0619")
@@ -144,7 +144,7 @@ func TestUASTExtractorConsume(t *testing.T) {
 
 	res, err = exr.Consume(deps)
 	assert.Nil(t, err)
-	uasts := res["uasts"].(map[plumbing.Hash]*uast.Node)
+	uasts := res[DependencyUasts].(map[plumbing.Hash]*uast.Node)
 	assert.Equal(t, len(uasts), 1)
 	assert.Equal(t, len(uasts[hash].Children), 24)
 }
@@ -160,15 +160,15 @@ func TestUASTChangesMeta(t *testing.T) {
 	ch := fixtureUASTChanges()
 	assert.Equal(t, ch.Name(), "UASTChanges")
 	assert.Equal(t, len(ch.Provides()), 1)
-	assert.Equal(t, ch.Provides()[0], "changed_uasts")
+	assert.Equal(t, ch.Provides()[0], DependencyUastChanges)
 	assert.Equal(t, len(ch.Requires()), 2)
-	assert.Equal(t, ch.Requires()[0], "uasts")
-	assert.Equal(t, ch.Requires()[1], "changes")
+	assert.Equal(t, ch.Requires()[0], DependencyUasts)
+	assert.Equal(t, ch.Requires()[1], DependencyTreeChanges)
 	opts := ch.ListConfigurationOptions()
 	assert.Len(t, opts, 0)
 	feats := ch.Features()
 	assert.Len(t, feats, 1)
-	assert.Equal(t, feats[0], "uast")
+	assert.Equal(t, feats[0], FeatureUast)
 }
 
 func TestUASTChangesRegistration(t *testing.T) {
@@ -247,14 +247,14 @@ func TestUASTChangesConsume(t *testing.T) {
 	},
 	}
 	deps := map[string]interface{}{}
-	deps["uasts"] = uasts
-	deps["changes"] = changes
+	deps[DependencyUasts] = uasts
+	deps[DependencyTreeChanges] = changes
 	ch := fixtureUASTChanges()
 	ch.cache[changes[0].From.TreeEntry.Hash] = uastsArray[3]
 	ch.cache[changes[2].From.TreeEntry.Hash] = uastsArray[0]
 	resultMap, err := ch.Consume(deps)
 	assert.Nil(t, err)
-	result := resultMap["changed_uasts"].([]UASTChange)
+	result := resultMap[DependencyUastChanges].([]UASTChange)
 	assert.Len(t, result, 3)
 	assert.Equal(t, result[0].Change, changes[0])
 	assert.Equal(t, result[0].Before, uastsArray[3])
@@ -278,13 +278,13 @@ func TestUASTChangesSaverMeta(t *testing.T) {
 	assert.Equal(t, chs.Name(), "UASTChangesSaver")
 	assert.Equal(t, len(chs.Provides()), 0)
 	assert.Equal(t, len(chs.Requires()), 1)
-	assert.Equal(t, chs.Requires()[0], "changed_uasts")
+	assert.Equal(t, chs.Requires()[0], DependencyUastChanges)
 	opts := chs.ListConfigurationOptions()
 	assert.Len(t, opts, 1)
 	assert.Equal(t, opts[0].Name, ConfigUASTChangesSaverOutputPath)
 	feats := chs.Features()
 	assert.Len(t, feats, 1)
-	assert.Equal(t, feats[0], "uast")
+	assert.Equal(t, feats[0], FeatureUast)
 	assert.Equal(t, chs.Flag(), "dump-uast-changes")
 }
 
@@ -311,7 +311,7 @@ func TestUASTChangesSaverPayload(t *testing.T) {
 	chs := fixtureUASTChangesSaver()
 	deps := map[string]interface{}{}
 	changes := make([]UASTChange, 1)
-	deps["changed_uasts"] = changes
+	deps[DependencyUastChanges] = changes
 	treeFrom, _ := testRepository.TreeObject(plumbing.NewHash(
 		"a1eb2ea76eb7f9bfbde9b243861474421000eb96"))
 	treeTo, _ := testRepository.TreeObject(plumbing.NewHash(