瀏覽代碼

Add automatic graph build

Vadim Markovtsev 7 年之前
父節點
當前提交
1f6d0de955
共有 20 個文件被更改,包括 608 次插入81 次删除
  1. 10 0
      blob_cache.go
  2. 24 2
      burndown.go
  3. 3 3
      burndown_test.go
  4. 6 6
      cmd/hercules/main.go
  5. 12 2
      couples.go
  6. 5 5
      couples_test.go
  7. 6 0
      day.go
  8. 9 4
      diff.go
  9. 52 0
      diff_refiner.go
  10. 3 3
      diff_test.go
  11. 31 3
      identity.go
  12. 12 12
      identity_test.go
  13. 164 13
      pipeline.go
  14. 49 3
      pipeline_test.go
  15. 12 2
      renames.go
  16. 3 3
      renames_test.go
  17. 166 13
      toposort/toposort.go
  18. 6 0
      tree_diff.go
  19. 34 6
      uast.go
  20. 1 1
      uast_test.go

+ 10 - 0
blob_cache.go

@@ -32,6 +32,12 @@ func (cache *BlobCache) Requires() []string {
 	return arr[:]
 }
 
+func (cache *BlobCache) Construct(facts map[string]interface{}) {
+	if val, exists := facts["BlobCache.IgnoreMissingSubmodules"].(bool); exists {
+		cache.IgnoreMissingSubmodules = val
+	}
+}
+
 func (cache *BlobCache) Initialize(repository *git.Repository) {
 	cache.repository = repository
 	cache.cache = map[plumbing.Hash]*object.Blob{}
@@ -139,3 +145,7 @@ func (cache *BlobCache) getBlob(entry *object.ChangeEntry, fileGetter FileGetter
 	}
 	return blob, nil
 }
+
+func init() {
+  Registry.Register(&BlobCache{})
+}

+ 24 - 2
burndown.go

@@ -78,10 +78,28 @@ func (analyser *BurndownAnalysis) Provides() []string {
 }
 
 func (analyser *BurndownAnalysis) Requires() []string {
-	arr := [...]string{"file_diff", "renamed_changes", "blob_cache", "day", "author"}
+	arr := [...]string{"file_diff", "changes", "blob_cache", "day", "author"}
 	return arr[:]
 }
 
+func (analyser *BurndownAnalysis) Construct(facts map[string]interface{}) {
+	if val, exists := facts["Burndown.Granularity"].(int); exists {
+		analyser.Granularity = val
+	}
+	if val, exists := facts["Burndown.Sampling"].(int); exists {
+		analyser.Sampling = val
+	}
+	if val, exists := facts["Burndown.TrackFiles"].(bool); exists {
+		analyser.TrackFiles = val
+	}
+	if val, exists := facts["PeopleNumber"].(int); exists {
+		analyser.PeopleNumber = val
+	}
+	if val, exists := facts["Burndown.Debug"].(bool); exists {
+		analyser.Debug = val
+	}
+}
+
 func (analyser *BurndownAnalysis) Initialize(repository *git.Repository) {
 	analyser.repository = repository
 	analyser.globalStatus = map[int]int64{}
@@ -109,7 +127,7 @@ func (analyser *BurndownAnalysis) Consume(deps map[string]interface{}) (map[stri
 		analyser.updateHistories(gs, fss, pss, delta)
 	}
 	cache := deps["blob_cache"].(map[plumbing.Hash]*object.Blob)
-	treeDiffs := deps["renamed_changes"].(object.Changes)
+	treeDiffs := deps["changes"].(object.Changes)
 	fileDiffs := deps["file_diff"].(map[string]FileDiffData)
 	for _, change := range treeDiffs {
 		action, err := change.Action()
@@ -531,3 +549,7 @@ func (analyser *BurndownAnalysis) updateHistories(
 		analyser.peopleHistories[key] = ph
 	}
 }
+
+func init() {
+  Registry.Register(&BurndownAnalysis{})
+}

+ 3 - 3
burndown_test.go

@@ -13,7 +13,7 @@ func TestBurndownMeta(t *testing.T) {
 	burndown := BurndownAnalysis{}
 	assert.Equal(t, burndown.Name(), "Burndown")
 	assert.Equal(t, len(burndown.Provides()), 0)
-	required := [...]string{"file_diff", "renamed_changes", "blob_cache", "day", "author"}
+	required := [...]string{"file_diff", "changes", "blob_cache", "day", "author"}
 	for _, name := range required {
 		assert.Contains(t, burndown.Requires(), name)
 	}
@@ -84,7 +84,7 @@ func TestBurndownConsumeFinalize(t *testing.T) {
 		},
 	},
 	}
-	deps["renamed_changes"] = changes
+	deps["changes"] = changes
 	fd := fixtureFileDiff()
 	result, err := fd.Consume(deps)
 	assert.Nil(t, err)
@@ -181,7 +181,7 @@ func TestBurndownConsumeFinalize(t *testing.T) {
 		},
 	}, To: object.ChangeEntry{},
 	}
-	deps["renamed_changes"] = changes
+	deps["changes"] = changes
 	fd = fixtureFileDiff()
 	result, err = fd.Consume(deps)
 	assert.Nil(t, err)

+ 6 - 6
cmd/hercules/main.go

@@ -216,10 +216,10 @@ func main() {
 	if withPeople || withCouples {
 		if people_dict_path != "" {
 			idMatcher.LoadPeopleDict(people_dict_path)
-			peopleCount = len(idMatcher.ReversePeopleDict) - 1
+			peopleCount = len(idMatcher.ReversedPeopleDict) - 1
 		} else {
 			idMatcher.GeneratePeopleDict(commits)
-			peopleCount = len(idMatcher.ReversePeopleDict)
+			peopleCount = len(idMatcher.ReversedPeopleDict)
 		}
 	}
 	pipeline.AddItem(idMatcher)
@@ -236,8 +236,8 @@ func main() {
 		coupler = &hercules.Couples{PeopleNumber: peopleCount}
 		pipeline.AddItem(coupler)
 	}
-
-	pipeline.Initialize()
+	facts := map[string]interface{}{}
+	pipeline.Initialize(facts)
 	result, err := pipeline.Run(commits)
 	if err != nil {
 		panic(err)
@@ -280,11 +280,11 @@ func main() {
 	if !protobuf {
 		printResults(uri, begin, end, granularity, sampling,
 			withFiles, withPeople, withCouples,
-			burndownResults, couplesResult, idMatcher.ReversePeopleDict)
+			burndownResults, couplesResult, idMatcher.ReversedPeopleDict)
 	} else {
 		serializeResults(uri, begin, end, granularity, sampling,
 			withFiles, withPeople, withCouples,
-			burndownResults, couplesResult, idMatcher.ReversePeopleDict)
+			burndownResults, couplesResult, idMatcher.ReversedPeopleDict)
 	}
 }
 

+ 12 - 2
couples.go

@@ -36,10 +36,16 @@ func (couples *Couples) Provides() []string {
 }
 
 func (couples *Couples) Requires() []string {
-	arr := [...]string{"author", "renamed_changes"}
+	arr := [...]string{"author", "changes"}
 	return arr[:]
 }
 
+func (couples *Couples) Construct(facts map[string]interface{}) {
+	if val, exists := facts["PeopleNumber"].(int); exists {
+		couples.PeopleNumber = val
+	}
+}
+
 func (couples *Couples) Initialize(repository *git.Repository) {
 	couples.people = make([]map[string]int, couples.PeopleNumber+1)
 	for i := range couples.people {
@@ -55,7 +61,7 @@ func (couples *Couples) Consume(deps map[string]interface{}) (map[string]interfa
 		author = couples.PeopleNumber
 	}
 	couples.people_commits[author] += 1
-	tree_diff := deps["renamed_changes"].(object.Changes)
+	tree_diff := deps["changes"].(object.Changes)
 	context := make([]string, 0)
 	deleteFile := func(name string) {
 		// we do not remove the file from people - the context does not expire
@@ -161,3 +167,7 @@ func (couples *Couples) Finalize() interface{} {
 		PeopleMatrix: peopleMatrix, PeopleFiles: peopleFiles,
 		Files: filesSequence, FilesMatrix: filesMatrix}
 }
+
+func init() {
+  Registry.Register(&Couples{})
+}

+ 5 - 5
couples_test.go

@@ -20,7 +20,7 @@ func TestCouplesMeta(t *testing.T) {
 	assert.Equal(t, len(c.Provides()), 0)
 	assert.Equal(t, len(c.Requires()), 2)
 	assert.Equal(t, c.Requires()[0], "author")
-	assert.Equal(t, c.Requires()[1], "renamed_changes")
+	assert.Equal(t, c.Requires()[1], "changes")
 }
 
 func generateChanges(names ...string) object.Changes {
@@ -63,15 +63,15 @@ func TestCouplesConsumeFinalize(t *testing.T) {
 	c := fixtureCouples()
 	deps := map[string]interface{}{}
 	deps["author"] = 0
-	deps["renamed_changes"] = generateChanges("+two", "+four", "+six")
+	deps["changes"] = generateChanges("+two", "+four", "+six")
 	c.Consume(deps)
-	deps["renamed_changes"] = generateChanges("+one", "-two", "=three", ">four>five")
+	deps["changes"] = generateChanges("+one", "-two", "=three", ">four>five")
 	c.Consume(deps)
 	deps["author"] = 1
-	deps["renamed_changes"] = generateChanges("=one", "=three", "-six")
+	deps["changes"] = generateChanges("=one", "=three", "-six")
 	c.Consume(deps)
 	deps["author"] = 2
-	deps["renamed_changes"] = generateChanges("=five")
+	deps["changes"] = generateChanges("=five")
 	c.Consume(deps)
 	assert.Equal(t, len(c.people[0]), 5)
 	assert.Equal(t, c.people[0]["one"], 1)

+ 6 - 0
day.go

@@ -25,6 +25,8 @@ func (days *DaysSinceStart) Requires() []string {
 	return []string{}
 }
 
+func (days *DaysSinceStart) Construct(facts map[string]interface{}) {}
+
 func (days *DaysSinceStart) Initialize(repository *git.Repository) {
 	days.day0 = time.Time{}
 	days.previousDay = 0
@@ -49,3 +51,7 @@ func (days *DaysSinceStart) Consume(deps map[string]interface{}) (map[string]int
 func (days *DaysSinceStart) Finalize() interface{} {
 	return nil
 }
+
+func init() {
+  Registry.Register(&DaysSinceStart{})
+}

+ 9 - 4
diff.go

@@ -31,17 +31,18 @@ func (diff *FileDiff) Provides() []string {
 }
 
 func (diff *FileDiff) Requires() []string {
-	arr := [...]string{"renamed_changes", "blob_cache"}
+	arr := [...]string{"changes", "blob_cache"}
 	return arr[:]
 }
 
-func (diff *FileDiff) Initialize(repository *git.Repository) {
-}
+func (diff *FileDiff) Construct(facts map[string]interface{}) {}
+
+func (diff *FileDiff) Initialize(repository *git.Repository) {}
 
 func (diff *FileDiff) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
 	result := map[string]FileDiffData{}
 	cache := deps["blob_cache"].(map[plumbing.Hash]*object.Blob)
-	tree_diff := deps["renamed_changes"].(object.Changes)
+	tree_diff := deps["changes"].(object.Changes)
 	for _, change := range tree_diff {
 		action, err := change.Action()
 		if err != nil {
@@ -93,3 +94,7 @@ func blobToString(file *object.Blob) (string, error) {
 	buf.ReadFrom(reader)
 	return buf.String(), nil
 }
+
+func init() {
+  Registry.Register(&FileDiff{})
+}

+ 52 - 0
diff_refiner.go

@@ -0,0 +1,52 @@
+package hercules
+
+import (
+	"gopkg.in/src-d/go-git.v4"
+)
+
+type FileDiffRefiner struct {
+}
+
+func (ref *FileDiffRefiner) Name() string {
+	return "FileDiffRefiner"
+}
+
+func (ref *FileDiffRefiner) Provides() []string {
+	arr := [...]string{"file_diff"}
+	return arr[:]
+}
+
+func (ref *FileDiffRefiner) Requires() []string {
+	arr := [...]string{"file_diff", "changed_uasts"}
+	return arr[:]
+}
+
+func (ref *FileDiffRefiner) Construct(facts map[string]interface{}) {}
+
+func (ref *FileDiffRefiner) Initialize(repository *git.Repository) {
+}
+
+func (ref *FileDiffRefiner) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
+	changesList := deps["changed_uasts"].([]UASTChange)
+	changes := map[string]UASTChange{}
+	for _, change := range changesList {
+		if change.Before != nil && change.After != nil {
+			changes[change.Change.To.Name] = change
+		}
+	}
+	diffs := deps["file_diff"].(map[string]FileDiffData)
+	for fileName, _/*diff*/ := range diffs {
+		_/*change*/ = changes[fileName]
+		// TODO: scan diff line by line
+	}
+	result := map[string]FileDiffData{}
+	return map[string]interface{}{"file_diff": result}, nil
+}
+
+func (ref *FileDiffRefiner) Finalize() interface{} {
+	return nil
+}
+
+func init() {
+  Registry.Register(&FileDiffRefiner{})
+}

+ 3 - 3
diff_test.go

@@ -22,7 +22,7 @@ func TestFileDiffMeta(t *testing.T) {
 	assert.Equal(t, len(fd.Provides()), 1)
 	assert.Equal(t, fd.Provides()[0], "file_diff")
 	assert.Equal(t, len(fd.Requires()), 2)
-	assert.Equal(t, fd.Requires()[0], "renamed_changes")
+	assert.Equal(t, fd.Requires()[0], "changes")
 	assert.Equal(t, fd.Requires()[1], "blob_cache")
 }
 
@@ -85,7 +85,7 @@ func TestFileDiffConsume(t *testing.T) {
 		},
 	}, To: object.ChangeEntry{},
 	}
-	deps["renamed_changes"] = changes
+	deps["changes"] = changes
 	res, err := fd.Consume(deps)
 	assert.Nil(t, err)
 	diffs := res["file_diff"].(map[string]FileDiffData)
@@ -142,7 +142,7 @@ func TestFileDiffConsumeInvalidBlob(t *testing.T) {
 			Hash: plumbing.NewHash("334cde09da4afcb74f8d2b3e6fd6cce61228b485"),
 		},
 	}}
-	deps["renamed_changes"] = changes
+	deps["changes"] = changes
 	res, err := fd.Consume(deps)
 	assert.Nil(t, res)
 	assert.NotNil(t, err)

+ 31 - 3
identity.go

@@ -14,7 +14,7 @@ type IdentityDetector struct {
 	// Maps email || name  -> developer id.
 	PeopleDict map[string]int
 	// Maps developer id -> description
-	ReversePeopleDict []string
+	ReversedPeopleDict []string
 }
 
 const MISSING_AUTHOR = (1 << 18) - 1
@@ -33,6 +33,30 @@ func (id *IdentityDetector) Requires() []string {
 	return []string{}
 }
 
+func (id *IdentityDetector) Construct(facts map[string]interface{}) {
+	if val, exists := facts["IdentityDetector.PeopleDict"].(map[string]int); exists {
+		id.PeopleDict = val
+	}
+	if val, exists := facts["IdentityDetector.ReversedPeopleDict"].([]string); exists {
+		id.ReversedPeopleDict = val
+	}
+	if id.PeopleDict == nil {
+		if id.ReversedPeopleDict != nil {
+			panic("IdentityDetector: ReversedPeopleDict != nil while PeopleDict == nil")
+		}
+		peopleDictPath := facts["IdentityDetector.PeopleDictPath"].(string)
+		if peopleDictPath != "" {
+			id.LoadPeopleDict(peopleDictPath)
+			facts["PeopleCount"] = len(id.ReversedPeopleDict) - 1
+		} else {
+			id.GeneratePeopleDict(facts["commits"].([]*object.Commit))
+			facts["PeopleCount"] = len(id.ReversedPeopleDict)
+		}
+	} else {
+		facts["PeopleCount"] = len(id.ReversedPeopleDict)
+	}
+}
+
 func (id *IdentityDetector) Initialize(repository *git.Repository) {
 }
 
@@ -73,7 +97,7 @@ func (id *IdentityDetector) LoadPeopleDict(path string) error {
 	}
 	reverse_dict = append(reverse_dict, "<unmatched>")
 	id.PeopleDict = dict
-	id.ReversePeopleDict = reverse_dict
+	id.ReversedPeopleDict = reverse_dict
 	return nil
 }
 
@@ -169,5 +193,9 @@ func (id *IdentityDetector) GeneratePeopleDict(commits []*object.Commit) {
 		reverse_dict[val] = strings.Join(names[val], "|") + "|" + strings.Join(emails[val], "|")
 	}
 	id.PeopleDict = dict
-	id.ReversePeopleDict = reverse_dict
+	id.ReversedPeopleDict = reverse_dict
+}
+
+func init() {
+  Registry.Register(&IdentityDetector{})
 }

+ 12 - 12
identity_test.go

@@ -23,8 +23,8 @@ func fixtureIdentityDetector() *IdentityDetector {
 	reversePeopleDict := make([]string, 1)
 	reversePeopleDict[0] = "Vadim"
 	id := IdentityDetector{
-		PeopleDict:        peopleDict,
-		ReversePeopleDict: reversePeopleDict,
+		PeopleDict:         peopleDict,
+		ReversedPeopleDict: reversePeopleDict,
 	}
 	id.Initialize(testRepository)
 	return &id
@@ -66,11 +66,11 @@ func TestLoadPeopleDict(t *testing.T) {
 	assert.Contains(t, id.PeopleDict, "another@one.com")
 	assert.Contains(t, id.PeopleDict, "máximo cuadros")
 	assert.Contains(t, id.PeopleDict, "maximo@sourced.tech")
-	assert.Equal(t, len(id.ReversePeopleDict), 4)
-	assert.Equal(t, id.ReversePeopleDict[0], "Linus Torvalds")
-	assert.Equal(t, id.ReversePeopleDict[1], "Vadim Markovtsev")
-	assert.Equal(t, id.ReversePeopleDict[2], "Máximo Cuadros")
-	assert.Equal(t, id.ReversePeopleDict[3], "<unmatched>")
+	assert.Equal(t, len(id.ReversedPeopleDict), 4)
+	assert.Equal(t, id.ReversedPeopleDict[0], "Linus Torvalds")
+	assert.Equal(t, id.ReversedPeopleDict[1], "Vadim Markovtsev")
+	assert.Equal(t, id.ReversedPeopleDict[2], "Máximo Cuadros")
+	assert.Equal(t, id.ReversedPeopleDict[3], "<unmatched>")
 }
 
 /*
@@ -128,7 +128,7 @@ func TestGeneratePeopleDict(t *testing.T) {
 	}
 	id.GeneratePeopleDict(commits)
 	assert.True(t, len(id.PeopleDict) >= 7)
-	assert.True(t, len(id.ReversePeopleDict) >= 3)
+	assert.True(t, len(id.ReversedPeopleDict) >= 3)
 	assert.Equal(t, id.PeopleDict["vadim markovtsev"], 0)
 	assert.Equal(t, id.PeopleDict["vadim@sourced.tech"], 0)
 	assert.Equal(t, id.PeopleDict["gmarkhor@gmail.com"], 0)
@@ -136,9 +136,9 @@ func TestGeneratePeopleDict(t *testing.T) {
 	assert.Equal(t, id.PeopleDict["bzz@apache.org"], 1)
 	assert.Equal(t, id.PeopleDict["máximo cuadros"], 2)
 	assert.Equal(t, id.PeopleDict["mcuadros@gmail.com"], 2)
-	assert.Equal(t, id.ReversePeopleDict[0], "vadim markovtsev|gmarkhor@gmail.com|vadim@sourced.tech")
-	assert.Equal(t, id.ReversePeopleDict[1], "alexander bezzubov|bzz@apache.org")
-	assert.Equal(t, id.ReversePeopleDict[2], "máximo cuadros|mcuadros@gmail.com")
+	assert.Equal(t, id.ReversedPeopleDict[0], "vadim markovtsev|gmarkhor@gmail.com|vadim@sourced.tech")
+	assert.Equal(t, id.ReversedPeopleDict[1], "alexander bezzubov|bzz@apache.org")
+	assert.Equal(t, id.ReversedPeopleDict[2], "máximo cuadros|mcuadros@gmail.com")
 }
 
 func TestIdentityDetectorFinalize(t *testing.T) {
@@ -277,6 +277,6 @@ func TestGeneratePeopleDictMailmap(t *testing.T) {
 		"Strange Guy <vadim@sourced.tech>\nVadim Markovtsev <vadim@sourced.tech> Strange Guy <vadim@sourced.tech>")
 	commits = append(commits, fake)
 	id.GeneratePeopleDict(commits)
-	assert.Contains(t, id.ReversePeopleDict,
+	assert.Contains(t, id.ReversedPeopleDict,
 		"strange guy|vadim markovtsev|gmarkhor@gmail.com|vadim@sourced.tech")
 }

+ 164 - 13
pipeline.go

@@ -1,12 +1,14 @@
 package hercules
 
 import (
+	"bufio"
 	"errors"
 	"fmt"
 	"io"
+	"io/ioutil"
 	"os"
+	"reflect"
 
-	"bufio"
 	"gopkg.in/src-d/go-git.v4"
 	"gopkg.in/src-d/go-git.v4/plumbing"
 	"gopkg.in/src-d/go-git.v4/plumbing/object"
@@ -21,6 +23,9 @@ type PipelineItem interface {
 	Provides() []string
 	// Requires returns the list of keys of needed entities which must be supplied in Consume().
 	Requires() []string
+	// Construct performs the initial creation of the object by taking parameters from facts.
+	// It allows to create PipelineItems in a universal way.
+	Construct(facts map[string]interface{})
 	// Initialize prepares and resets the item. Consume() requires Initialize()
 	// to be called at least once beforehand.
 	Initialize(*git.Repository)
@@ -33,6 +38,44 @@ type PipelineItem interface {
 	Finalize() interface{}
 }
 
+type PipelineItemRegistry struct {
+	provided map[string][]reflect.Type
+}
+
+func (registry *PipelineItemRegistry) Register(example PipelineItem) {
+	if registry.provided == nil {
+		registry.provided = map[string][]reflect.Type{}
+	}
+	t := reflect.TypeOf(example)
+	for _, dep := range example.Provides() {
+		ts := registry.provided[dep]
+		if ts == nil {
+			ts = []reflect.Type{}
+		}
+		ts = append(ts, t)
+		registry.provided[dep] = ts
+	}
+}
+
+func (registry *PipelineItemRegistry) Summon(provides string) []PipelineItem {
+	if registry.provided == nil {
+		return []PipelineItem{}
+	}
+	ts := registry.provided[provides]
+	items := []PipelineItem{}
+	for _, t := range ts {
+		items = append(items, reflect.New(t.Elem()).Interface().(PipelineItem))
+	}
+	return items
+}
+
+var Registry = &PipelineItemRegistry{}
+
+type wrappedPipelineItem struct {
+	Item PipelineItem
+	Children []wrappedPipelineItem
+}
+
 type Pipeline struct {
 	// OnProgress is the callback which is invoked in Analyse() to output it's
 	// progress. The first argument is the number of processed commits and the
@@ -47,19 +90,48 @@ type Pipeline struct {
 
 	// plan is the resolved execution sequence.
 	plan []PipelineItem
+
+	// the collection of parameters to create items.
+	facts map[string]interface{}
 }
 
 func NewPipeline(repository *git.Repository) *Pipeline {
 	return &Pipeline{repository: repository, items: []PipelineItem{}, plan: []PipelineItem{}}
 }
 
-func (pipeline *Pipeline) AddItem(item PipelineItem) {
-	for _, reg := range pipeline.items {
-		if reg == item {
-			return
-		}
+func (pipeline *Pipeline) GetFact(name string) interface{} {
+	return pipeline.facts[name]
+}
+
+func (pipeline *Pipeline) SetFact(name string, value interface{}) {
+	pipeline.facts[name] = value
+}
+
+func (pipeline *Pipeline) DeployItem(item PipelineItem) PipelineItem {
+	queue := []PipelineItem{}
+	queue = append(queue, item)
+	added := map[string]PipelineItem{}
+	added[item.Name()] = item
+	pipeline.AddItem(item)
+	for len(queue) > 0 {
+		head := queue[0]
+		queue = queue[1:]
+		for _, dep := range head.Requires() {
+		  for _, sibling := range Registry.Summon(dep) {
+			  if _, exists := added[sibling.Name()]; !exists {
+				  added[sibling.Name()] = sibling
+				  queue = append(queue, sibling)
+				  pipeline.AddItem(sibling)
+			  }
+		  }
+	  }
 	}
+	return item
+}
+
+func (pipeline *Pipeline) AddItem(item PipelineItem) PipelineItem {
 	pipeline.items = append(pipeline.items, item)
+	return item
 }
 
 func (pipeline *Pipeline) RemoveItem(item PipelineItem) {
@@ -99,28 +171,98 @@ func (pipeline *Pipeline) Commits() []*object.Commit {
 	return result
 }
 
-func (pipeline *Pipeline) Initialize() {
+func (pipeline *Pipeline) Initialize(facts map[string]interface{}) {
 	graph := toposort.NewGraph()
 	name2item := map[string]PipelineItem{}
-	for index, item := range pipeline.items {
-		name := fmt.Sprintf("%s_%d", item.Name(), index)
+	ambiguousMap := map[string][]string{}
+	nameUsages := map[string]int{}
+	for _, item := range pipeline.items {
+		nameUsages[item.Name()]++
+	}
+	counters := map[string]int{}
+	for _, item := range pipeline.items {
+		name := item.Name()
+		if nameUsages[name] > 1 {
+			index := counters[item.Name()] + 1
+			counters[item.Name()] = index
+			name = fmt.Sprintf("%s_%d", item.Name(), index)
+		}
 		graph.AddNode(name)
 		name2item[name] = item
 		for _, key := range item.Provides() {
 			key = "[" + key + "]"
 			graph.AddNode(key)
-			graph.AddEdge(name, key)
+			if graph.AddEdge(name, key) > 1 {
+				if ambiguousMap[key] != nil {
+					panic("Failed to resolve pipeline dependencies.")
+				}
+				ambiguousMap[key] = graph.FindParents(key)
+			}
 		}
 	}
-	for index, item := range pipeline.items {
-		name := fmt.Sprintf("%s_%d", item.Name(), index)
+	counters = map[string]int{}
+	for _, item := range pipeline.items {
+		name := item.Name()
+		if nameUsages[name] > 1 {
+			index := counters[item.Name()] + 1
+			counters[item.Name()] = index
+			name = fmt.Sprintf("%s_%d", item.Name(), index)
+		}
 		for _, key := range item.Requires() {
 			key = "[" + key + "]"
-			if !graph.AddEdge(key, name) {
+			if graph.AddEdge(key, name) == 0 {
 				panic(fmt.Sprintf("Unsatisfied dependency: %s -> %s", key, item.Name()))
 			}
 		}
 	}
+	if len(ambiguousMap) > 0 {
+		ambiguous := []string{}
+		for key := range ambiguousMap {
+			ambiguous = append(ambiguous, key)
+		}
+		bfsorder := graph.BreadthSort()
+		bfsindex := map[string]int{}
+		for i, s := range bfsorder {
+			bfsindex[s] = i
+		}
+		for len(ambiguous) > 0 {
+			key := ambiguous[0]
+			ambiguous = ambiguous[1:]
+			pair := ambiguousMap[key]
+			inheritor := pair[1]
+			if bfsindex[pair[1]] < bfsindex[pair[0]] {
+				inheritor = pair[0]
+			}
+			removed := graph.RemoveEdge(key, inheritor)
+			cycle := map[string]bool{}
+			for _, node := range graph.FindCycle(key) {
+				cycle[node] = true
+			}
+			if len(cycle) == 0 {
+				cycle[inheritor] = true
+			}
+			if removed {
+				graph.AddEdge(key, inheritor)
+			}
+			graph.RemoveEdge(inheritor, key)
+			graph.ReindexNode(inheritor)
+			// for all nodes key links to except those in cycle, put the link from inheritor
+			for _, node := range graph.FindChildren(key) {
+				if _, exists := cycle[node]; !exists {
+					graph.AddEdge(inheritor, node)
+					graph.RemoveEdge(key, node)
+				}
+			}
+			graph.ReindexNode(key)
+		}
+	}
+	if dumpPath, exists := facts["Pipeline.DumpPath"].(string); exists {
+		ioutil.WriteFile(dumpPath, []byte(graph.Serialize([]string{})), 0666)
+	}
+	var graphCopy *toposort.Graph
+	if _, exists := facts["Pipeline.DumpPath"].(string); exists {
+		graphCopy = graph.Copy()
+	}
 	strplan, ok := graph.Toposort()
 	if !ok {
 		panic("Failed to resolve pipeline dependencies.")
@@ -134,6 +276,15 @@ func (pipeline *Pipeline) Initialize() {
 	if len(pipeline.plan) != len(pipeline.items) {
 		panic("Internal pipeline dependency resolution error.")
 	}
+	if dumpPath, exists := facts["Pipeline.DumpPath"].(string); exists {
+		ioutil.WriteFile(dumpPath, []byte(graphCopy.Serialize(strplan)), 0666)
+	}
+	if dryRun, exists := facts["Pipeline.DryRun"].(bool); exists && dryRun {
+		return
+	}
+	for _, item := range pipeline.items {
+		item.Construct(facts)
+	}
 	for _, item := range pipeline.items {
 		item.Initialize(pipeline.repository)
 	}

+ 49 - 3
pipeline_test.go

@@ -12,6 +12,7 @@ import (
 	"gopkg.in/src-d/go-git.v4/plumbing"
 	"gopkg.in/src-d/go-git.v4/plumbing/object"
 	"gopkg.in/src-d/go-git.v4/storage/memory"
+	"path"
 )
 
 type testPipelineItem struct {
@@ -35,6 +36,9 @@ func (item *testPipelineItem) Requires() []string {
 	return []string{}
 }
 
+func (item *testPipelineItem) Construct(facts map[string]interface{}) {
+}
+
 func (item *testPipelineItem) Initialize(repository *git.Repository) {
 	item.Initialized = repository != nil
 }
@@ -80,6 +84,9 @@ func (item *dependingTestPipelineItem) Requires() []string {
 	return arr[:]
 }
 
+func (item *dependingTestPipelineItem) Construct(facts map[string]interface{}) {
+}
+
 func (item *dependingTestPipelineItem) Initialize(repository *git.Repository) {
 }
 
@@ -101,7 +108,7 @@ func TestPipelineRun(t *testing.T) {
 	pipeline := NewPipeline(testRepository)
 	item := &testPipelineItem{}
 	pipeline.AddItem(item)
-	pipeline.Initialize()
+	pipeline.Initialize(map[string]interface{}{})
 	assert.True(t, item.Initialized)
 	commits := make([]*object.Commit, 1)
 	commits[0], _ = testRepository.CommitObject(plumbing.NewHash(
@@ -193,7 +200,7 @@ func TestPipelineDeps(t *testing.T) {
 	item2 := &testPipelineItem{}
 	pipeline.AddItem(item1)
 	pipeline.AddItem(item2)
-	pipeline.Initialize()
+	pipeline.Initialize(map[string]interface{}{})
 	commits := make([]*object.Commit, 1)
 	commits[0], _ = testRepository.CommitObject(plumbing.NewHash(
 		"af9ddc0db70f09f3f27b4b98e415592a7485171c"))
@@ -209,7 +216,7 @@ func TestPipelineError(t *testing.T) {
 	item := &testPipelineItem{}
 	item.TestError = true
 	pipeline.AddItem(item)
-	pipeline.Initialize()
+	pipeline.Initialize(map[string]interface{}{})
 	commits := make([]*object.Commit, 1)
 	commits[0], _ = testRepository.CommitObject(plumbing.NewHash(
 		"af9ddc0db70f09f3f27b4b98e415592a7485171c"))
@@ -218,6 +225,45 @@ func TestPipelineError(t *testing.T) {
 	assert.NotNil(t, err)
 }
 
+func TestPipelineSerialize(t *testing.T) {
+	pipeline := NewPipeline(testRepository)
+	pipeline.DeployItem(&BurndownAnalysis{})
+	facts := map[string]interface{}{}
+	facts["Pipeline.DryRun"] = true
+	tmpdir, _ := ioutil.TempDir("", "hercules-")
+	defer os.RemoveAll(tmpdir)
+	dotpath := path.Join(tmpdir, "graph.dot")
+	facts["Pipeline.DumpPath"] = dotpath
+	pipeline.Initialize(facts)
+	bdot, _ := ioutil.ReadFile(dotpath)
+	dot := string(bdot)
+	assert.Equal(t, `digraph Hercules {
+  "6 BlobCache" -> "7 [blob_cache]"
+  "0 DaysSinceStart" -> "3 [day]"
+  "10 FileDiff" -> "12 [file_diff]"
+  "15 FileDiffRefiner" -> "16 Burndown"
+  "1 IdentityDetector" -> "4 [author]"
+  "8 RenameAnalysis" -> "16 Burndown"
+  "8 RenameAnalysis" -> "10 FileDiff"
+  "8 RenameAnalysis" -> "9 UAST"
+  "8 RenameAnalysis" -> "13 UASTChanges"
+  "2 TreeDiff" -> "5 [changes]"
+  "9 UAST" -> "11 [uasts]"
+  "13 UASTChanges" -> "14 [changed_uasts]"
+  "4 [author]" -> "16 Burndown"
+  "7 [blob_cache]" -> "16 Burndown"
+  "7 [blob_cache]" -> "10 FileDiff"
+  "7 [blob_cache]" -> "8 RenameAnalysis"
+  "7 [blob_cache]" -> "9 UAST"
+  "14 [changed_uasts]" -> "15 FileDiffRefiner"
+  "5 [changes]" -> "6 BlobCache"
+  "5 [changes]" -> "8 RenameAnalysis"
+  "3 [day]" -> "16 Burndown"
+  "12 [file_diff]" -> "15 FileDiffRefiner"
+  "11 [uasts]" -> "13 UASTChanges"
+}`, dot)
+}
+
 func init() {
 	cwd, err := os.Getwd()
 	if err == nil {

+ 12 - 2
renames.go

@@ -25,7 +25,7 @@ func (ra *RenameAnalysis) Name() string {
 }
 
 func (ra *RenameAnalysis) Provides() []string {
-	arr := [...]string{"renamed_changes"}
+	arr := [...]string{"changes"}
 	return arr[:]
 }
 
@@ -34,6 +34,12 @@ func (ra *RenameAnalysis) Requires() []string {
 	return arr[:]
 }
 
+func (ra *RenameAnalysis) Construct(facts map[string]interface{}) {
+	if val, exists := facts["RenameAnalysis.SimilarityThreshold"].(int); exists {
+		ra.SimilarityThreshold = val
+	}
+}
+
 func (ra *RenameAnalysis) Initialize(repository *git.Repository) {
 	if ra.SimilarityThreshold < 0 || ra.SimilarityThreshold > 100 {
 		panic("hercules.RenameAnalysis: an invalid SimilarityThreshold was specified")
@@ -149,7 +155,7 @@ func (ra *RenameAnalysis) Consume(deps map[string]interface{}) (map[string]inter
 	for _, blob := range deleted_blobs {
 		reduced_changes = append(reduced_changes, blob.change)
 	}
-	return map[string]interface{}{"renamed_changes": reduced_changes}, nil
+	return map[string]interface{}{"changes": reduced_changes}, nil
 }
 
 func (ra *RenameAnalysis) Finalize() interface{} {
@@ -233,3 +239,7 @@ func (slice sortableBlobs) Less(i, j int) bool {
 func (slice sortableBlobs) Swap(i, j int) {
 	slice[i], slice[j] = slice[j], slice[i]
 }
+
+func init() {
+  Registry.Register(&RenameAnalysis{})
+}

+ 3 - 3
renames_test.go

@@ -17,7 +17,7 @@ func TestRenameAnalysisMeta(t *testing.T) {
 	ra := fixtureRenameAnalysis()
 	assert.Equal(t, ra.Name(), "RenameAnalysis")
 	assert.Equal(t, len(ra.Provides()), 1)
-	assert.Equal(t, ra.Provides()[0], "renamed_changes")
+	assert.Equal(t, ra.Provides()[0], "changes")
 	assert.Equal(t, len(ra.Requires()), 2)
 	assert.Equal(t, ra.Requires()[0], "blob_cache")
 	assert.Equal(t, ra.Requires()[1], "changes")
@@ -101,12 +101,12 @@ func TestRenameAnalysisConsume(t *testing.T) {
 	ra.SimilarityThreshold = 33
 	res, err := ra.Consume(deps)
 	assert.Nil(t, err)
-	renamed := res["renamed_changes"].(object.Changes)
+	renamed := res["changes"].(object.Changes)
 	assert.Equal(t, len(renamed), 2)
 	ra.SimilarityThreshold = 35
 	res, err = ra.Consume(deps)
 	assert.Nil(t, err)
-	renamed = res["renamed_changes"].(object.Changes)
+	renamed = res["changes"].(object.Changes)
 	assert.Equal(t, len(renamed), 3)
 }
 

+ 166 - 13
toposort/toposort.go

@@ -1,25 +1,47 @@
 package toposort
 
-// Copied from https://github.com/philopon/go-toposort
+import (
+	"bytes"
+	"fmt"
+	"sort"
+)
+
+// Reworked from https://github.com/philopon/go-toposort
 
 type Graph struct {
-	nodes   []string
+	// Outgoing connections for every node.
 	outputs map[string]map[string]int
+	// How many parents each node has.
 	inputs  map[string]int
 }
 
+// NewGraph initializes a new Graph.
 func NewGraph() *Graph {
 	return &Graph{
-		nodes:   []string{},
 		inputs:  map[string]int{},
 		outputs: map[string]map[string]int{},
 	}
 }
 
-func (g *Graph) AddNode(name string) bool {
-	g.nodes = append(g.nodes, name)
+// Copy clones the graph and returns the independent copy.
+func (g *Graph) Copy() *Graph {
+	clone := NewGraph()
+	for k, v := range g.inputs {
+		clone.inputs[k] = v
+	}
+	for k1, v1 := range g.outputs {
+		m := map[string]int{}
+		clone.outputs[k1] = m
+		for k2, v2 := range v1 {
+			m[k2] = v2
+		}
+	}
+	return clone
+}
 
-	if _, ok := g.outputs[name]; ok {
+// AddNode inserts a new node into the graph.
+func (g *Graph) AddNode(name string) bool {
+	if _, exists := g.outputs[name]; exists {
 		return false
 	}
 	g.outputs[name] = make(map[string]int)
@@ -27,6 +49,7 @@ func (g *Graph) AddNode(name string) bool {
 	return true
 }
 
+// AddNodes inserts multiple nodes into the graph at once.
 func (g *Graph) AddNodes(names ...string) bool {
 	for _, name := range names {
 		if ok := g.AddNode(name); !ok {
@@ -36,16 +59,31 @@ func (g *Graph) AddNodes(names ...string) bool {
 	return true
 }
 
-func (g *Graph) AddEdge(from, to string) bool {
+// AddEdge inserts the link from "from" node to "to" node.
+func (g *Graph) AddEdge(from, to string) int {
 	m, ok := g.outputs[from]
 	if !ok {
-		return false
+		return 0
 	}
 
 	m[to] = len(m) + 1
-	g.inputs[to]++
+	ni := g.inputs[to] + 1
+	g.inputs[to] = ni
 
-	return true
+	return ni
+}
+
+// ReindexNode updates the internal representation of the node after edge removals.
+func (g *Graph) ReindexNode(node string) {
+	children, ok := g.outputs[node]
+	if !ok {
+		return
+	}
+	i := 1
+	for key := range children {
+		children[key] = i
+		i++
+	}
 }
 
 func (g *Graph) unsafeRemoveEdge(from, to string) {
@@ -53,6 +91,8 @@ func (g *Graph) unsafeRemoveEdge(from, to string) {
 	g.inputs[to]--
 }
 
+// RemoveEdge deletes the link from "from" node to "to" node.
+// Call ReindexNode(from) after you finish modifying the edges.
 func (g *Graph) RemoveEdge(from, to string) bool {
 	if _, ok := g.outputs[from]; !ok {
 		return false
@@ -61,15 +101,17 @@ func (g *Graph) RemoveEdge(from, to string) bool {
 	return true
 }
 
+// Toposort sorts the nodes in the graph in topological order.
 func (g *Graph) Toposort() ([]string, bool) {
-	L := make([]string, 0, len(g.nodes))
-	S := make([]string, 0, len(g.nodes))
+	L := make([]string, 0, len(g.outputs))
+	S := make([]string, 0, len(g.outputs))
 
-	for _, n := range g.nodes {
+	for n := range g.outputs {
 		if g.inputs[n] == 0 {
 			S = append(S, n)
 		}
 	}
+	sort.Strings(S)
 
 	for len(S) > 0 {
 		var n string
@@ -101,3 +143,114 @@ func (g *Graph) Toposort() ([]string, bool) {
 
 	return L, true
 }
+
+// BreadthSort sorts the nodes in the graph in BFS order.
+func (g *Graph) BreadthSort() []string {
+	L := make([]string, 0, len(g.outputs))
+	S := make([]string, 0, len(g.outputs))
+
+	for n := range g.outputs {
+		if g.inputs[n] == 0 {
+			S = append(S, n)
+		}
+	}
+
+	visited := map[string]bool{}
+	for len(S) > 0 {
+		node := S[0]
+		S = S[1:]
+		if _, exists := visited[node]; !exists {
+			L = append(L, node)
+			visited[node] = true
+			for child := range g.outputs[node] {
+				S = append(S, child)
+			}
+		}
+	}
+
+	return L
+}
+
+// FindCycle returns the cycle in the graph which contains "seed" node.
+func (g *Graph) FindCycle(seed string) []string {
+	type edge struct {
+		node string
+		parent string
+	}
+	S := make([]edge, 0, len(g.outputs))
+	S = append(S, edge{seed, ""})
+	visited := map[string]string{}
+	for len(S) > 0 {
+		e := S[0]
+		S = S[1:]
+		if parent, exists := visited[e.node]; !exists || parent == "" {
+			visited[e.node] = e.parent
+			for child := range g.outputs[e.node] {
+				S = append(S, edge{child, e.node})
+			}
+		}
+		if e.node == seed && e.parent != "" {
+			result := []string{}
+			node := e.parent
+			for node != seed {
+				result = append(result, node)
+				node = visited[node]
+			}
+			result = append(result, seed)
+			// reverse
+			for left, right := 0, len(result)-1; left < right; left, right = left+1, right-1 {
+				result[left], result[right] = result[right], result[left]
+			}
+			return result
+		}
+	}
+	return []string{}
+}
+
+// FindParents returns the other ends of incoming edges.
+func (g *Graph) FindParents(to string) []string {
+	result := []string{}
+	for node, children := range g.outputs {
+		if _, exists := children[to]; exists {
+			result = append(result, node)
+		}
+	}
+	return result
+}
+
+// FindChildren returns the other ends of outgoing edges.
+func (g *Graph) FindChildren(from string) []string {
+	result := []string{}
+	for child := range g.outputs[from] {
+		result = append(result, child)
+	}
+	return result
+}
+
+// Serialize outputs the graph in Graphviz format.
+func (g *Graph) Serialize(sorted []string) string {
+	node2index := map[string]int{}
+	for index, node := range sorted {
+		node2index[node] = index
+	}
+	var buffer bytes.Buffer
+	buffer.WriteString("digraph Hercules {\n")
+	nodesFrom := []string{}
+	for nodeFrom := range g.outputs {
+		nodesFrom = append(nodesFrom, nodeFrom)
+	}
+	sort.Strings(nodesFrom)
+  for _, nodeFrom := range nodesFrom {
+	  links := []string{}
+	  for nodeTo := range g.outputs[nodeFrom] {
+		  links = append(links, nodeTo)
+	  }
+	  sort.Strings(links)
+	  for _, nodeTo := range links {
+		  buffer.WriteString(fmt.Sprintf("  \"%d %s\" -> \"%d %s\"\n",
+			  node2index[nodeFrom], nodeFrom, node2index[nodeTo], nodeTo))
+	  }
+  }
+	buffer.WriteString("}")
+	return buffer.String()
+}

+ 6 - 0
tree_diff.go

@@ -24,6 +24,8 @@ func (treediff *TreeDiff) Requires() []string {
 	return []string{}
 }
 
+func (treediff *TreeDiff) Construct(facts map[string]interface{}) {}
+
 func (treediff *TreeDiff) Initialize(repository *git.Repository) {
 	treediff.previousTree = nil
 }
@@ -70,3 +72,7 @@ func (treediff *TreeDiff) Consume(deps map[string]interface{}) (map[string]inter
 func (treediff *TreeDiff) Finalize() interface{} {
 	return nil
 }
+
+func init() {
+  Registry.Register(&TreeDiff{})
+}

+ 34 - 6
uast.go

@@ -3,6 +3,8 @@ package hercules
 import (
 	"context"
 	"errors"
+	"fmt"
+	"os"
 	"runtime"
 	"strings"
 	"sync"
@@ -15,8 +17,6 @@ import (
 	"gopkg.in/src-d/go-git.v4/plumbing"
 	"gopkg.in/src-d/go-git.v4/plumbing/object"
 	"gopkg.in/src-d/go-git.v4/utils/merkletrie"
-	"fmt"
-	"os"
 )
 
 type UASTExtractor struct {
@@ -64,10 +64,28 @@ func (exr *UASTExtractor) Provides() []string {
 }
 
 func (exr *UASTExtractor) Requires() []string {
-	arr := [...]string{"renamed_changes", "blob_cache"}
+	arr := [...]string{"changes", "blob_cache"}
 	return arr[:]
 }
 
+func (exr *UASTExtractor) Construct(facts map[string]interface{}) {
+	if val, exists := facts["UAST.Endpoint"].(string); exists {
+		exr.Endpoint = val
+	}
+	if val, exists := facts["UAST.Context"].(func() context.Context); exists {
+		exr.Context = val
+	}
+	if val, exists := facts["UAST.PoolSize"].(int); exists {
+		exr.PoolSize = val
+	}
+	if val, exists := facts["UAST.Extensions"].(map[string]bool); exists {
+		exr.Extensions = val
+	}
+	if val, exists := facts["UAST.FailOnErrors"].(bool); exists {
+		exr.FailOnErrors = val
+	}
+}
+
 func (exr *UASTExtractor) Initialize(repository *git.Repository) {
 	if exr.Context == nil {
 		exr.Context = func() context.Context { return context.Background() }
@@ -100,7 +118,7 @@ func (exr *UASTExtractor) Initialize(repository *git.Repository) {
 
 func (exr *UASTExtractor) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
 	cache := deps["blob_cache"].(map[plumbing.Hash]*object.Blob)
-	treeDiffs := deps["renamed_changes"].(object.Changes)
+	treeDiffs := deps["changes"].(object.Changes)
 	uasts := map[plumbing.Hash]*uast.Node{}
 	lock := sync.RWMutex{}
 	errs := make([]error, 0)
@@ -220,17 +238,19 @@ func (uc *UASTChanges) Provides() []string {
 }
 
 func (uc *UASTChanges) Requires() []string {
-	arr := [...]string{"uasts", "renamed_changes"}
+	arr := [...]string{"uasts", "changes"}
 	return arr[:]
 }
 
+func (uc *UASTChanges) Construct(facts map[string]interface{}) {}
+
 func (uc *UASTChanges) Initialize(repository *git.Repository) {
 	uc.cache = map[plumbing.Hash]*uast.Node{}
 }
 
 func (uc *UASTChanges) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
   uasts := deps["uasts"].(map[plumbing.Hash]*uast.Node)
-	treeDiffs := deps["renamed_changes"].(object.Changes)
+	treeDiffs := deps["changes"].(object.Changes)
 	commit := make([]UASTChange, 0, len(treeDiffs))
 	for _, change := range treeDiffs {
 		action, err := change.Action()
@@ -280,6 +300,8 @@ func (saver *UASTChangesSaver) Requires() []string {
 	return arr[:]
 }
 
+func (saver *UASTChangesSaver) Construct(facts map[string]interface{}) {}
+
 func (saver *UASTChangesSaver) Initialize(repository *git.Repository) {
 	saver.result = [][]UASTChange{}
 }
@@ -293,3 +315,9 @@ func (saver *UASTChangesSaver) Consume(deps map[string]interface{}) (map[string]
 func (saver *UASTChangesSaver) Finalize() interface{} {
 	return saver.result
 }
+
+func init() {
+  Registry.Register(&UASTExtractor{})
+	Registry.Register(&UASTChanges{})
+	Registry.Register(&UASTChangesSaver{})
+}

+ 1 - 1
uast_test.go

@@ -6,7 +6,7 @@ import (
 	"github.com/stretchr/testify/assert"
 	"gopkg.in/src-d/go-git.v4/plumbing/object"
 	"gopkg.in/src-d/go-git.v4/plumbing"
-	"gopkg.in/bblfsh/sdk.v0/uast"
+	"gopkg.in/bblfsh/sdk.v1/uast"
 )
 
 func fixtureUASTExtractor() *UASTExtractor {