瀏覽代碼

Refactor cmdline args

Vadim Markovtsev 7 年之前
父節點
當前提交
dcc0342e93
共有 20 個文件被更改,包括 504 次插入293 次删除
  1. 19 8
      blob_cache.go
  2. 4 9
      blob_cache_test.go
  3. 57 8
      burndown.go
  4. 77 98
      cmd/hercules/main.go
  5. 7 3
      couples.go
  6. 6 6
      day.go
  7. 0 6
      day_test.go
  8. 6 6
      diff.go
  9. 8 8
      diff_refiner.go
  10. 0 6
      diff_test.go
  11. 30 14
      identity.go
  12. 0 6
      identity_test.go
  13. 153 38
      pipeline.go
  14. 2 2
      pipeline_test.go
  15. 22 8
      renames.go
  16. 0 6
      renames_test.go
  17. 6 6
      tree_diff.go
  18. 0 6
      tree_diff_test.go
  19. 104 41
      uast.go
  20. 3 8
      uast_test.go

+ 19 - 8
blob_cache.go

@@ -15,9 +15,13 @@ type BlobCache struct {
 	IgnoreMissingSubmodules bool
 	IgnoreMissingSubmodules bool
 
 
 	repository *git.Repository
 	repository *git.Repository
-	cache map[plumbing.Hash]*object.Blob
+	cache      map[plumbing.Hash]*object.Blob
 }
 }
 
 
+const (
+	ConfigBlobCacheIgnoreMissingSubmodules = "BlobCache.IgnoreMissingSubmodules"
+)
+
 func (cache *BlobCache) Name() string {
 func (cache *BlobCache) Name() string {
 	return "BlobCache"
 	return "BlobCache"
 }
 }
@@ -32,8 +36,19 @@ func (cache *BlobCache) Requires() []string {
 	return arr[:]
 	return arr[:]
 }
 }
 
 
-func (cache *BlobCache) Construct(facts map[string]interface{}) {
-	if val, exists := facts["BlobCache.IgnoreMissingSubmodules"].(bool); exists {
+func (cache *BlobCache) ListConfigurationOptions() []ConfigurationOption {
+	options := [...]ConfigurationOption{{
+		Name: ConfigBlobCacheIgnoreMissingSubmodules,
+		Description: "Specifies whether to panic if some submodules do not exist and thus " +
+			"the corresponding Git objects cannot be loaded.",
+		Flag:    "ignore-missing-submodules",
+		Type:    BoolConfigurationOption,
+		Default: false}}
+	return options[:]
+}
+
+func (cache *BlobCache) Configure(facts map[string]interface{}) {
+	if val, exists := facts[ConfigBlobCacheIgnoreMissingSubmodules].(bool); exists {
 		cache.IgnoreMissingSubmodules = val
 		cache.IgnoreMissingSubmodules = val
 	}
 	}
 }
 }
@@ -103,10 +118,6 @@ func (self *BlobCache) Consume(deps map[string]interface{}) (map[string]interfac
 	return map[string]interface{}{"blob_cache": cache}, nil
 	return map[string]interface{}{"blob_cache": cache}, nil
 }
 }
 
 
-func (cache *BlobCache) Finalize() interface{} {
-	return nil
-}
-
 type FileGetter func(path string) (*object.File, error)
 type FileGetter func(path string) (*object.File, error)
 
 
 func (cache *BlobCache) getBlob(entry *object.ChangeEntry, fileGetter FileGetter) (
 func (cache *BlobCache) getBlob(entry *object.ChangeEntry, fileGetter FileGetter) (
@@ -147,5 +158,5 @@ func (cache *BlobCache) getBlob(entry *object.ChangeEntry, fileGetter FileGetter
 }
 }
 
 
 func init() {
 func init() {
-  Registry.Register(&BlobCache{})
+	Registry.Register(&BlobCache{})
 }
 }

+ 4 - 9
blob_cache_test.go

@@ -184,10 +184,10 @@ func TestBlobCacheConsumeBadHashes(t *testing.T) {
 	assert.Nil(t, err)
 	assert.Nil(t, err)
 	changes[0] = &object.Change{From: object.ChangeEntry{},
 	changes[0] = &object.Change{From: object.ChangeEntry{},
 		To: object.ChangeEntry{
 		To: object.ChangeEntry{
-		Name:      "labours.py",
-		Tree:      treeTo,
-		TreeEntry: object.TreeEntry{},
-	}}
+			Name:      "labours.py",
+			Tree:      treeTo,
+			TreeEntry: object.TreeEntry{},
+		}}
 	result, err = fixtureBlobCache().Consume(deps)
 	result, err = fixtureBlobCache().Consume(deps)
 	assert.Nil(t, result)
 	assert.Nil(t, result)
 	assert.NotNil(t, err)
 	assert.NotNil(t, err)
@@ -222,11 +222,6 @@ func TestBlobCacheConsumeInvalidHash(t *testing.T) {
 	assert.NotNil(t, err)
 	assert.NotNil(t, err)
 }
 }
 
 
-func TestBlobCacheFinalize(t *testing.T) {
-	outcome := fixtureBlobCache().Finalize()
-	assert.Nil(t, outcome)
-}
-
 func TestBlobCacheGetBlob(t *testing.T) {
 func TestBlobCacheGetBlob(t *testing.T) {
 	cache := fixtureBlobCache()
 	cache := fixtureBlobCache()
 	treeFrom, _ := testRepository.TreeObject(plumbing.NewHash(
 	treeFrom, _ := testRepository.TreeObject(plumbing.NewHash(

+ 57 - 8
burndown.go

@@ -69,6 +69,14 @@ type BurndownResult struct {
 	PeopleMatrix    [][]int64
 	PeopleMatrix    [][]int64
 }
 }
 
 
+const (
+	ConfigBurndownGranularity = "Burndown.Granularity"
+	ConfigBurndownSampling    = "Burndown.Sampling"
+	ConfigBurndownTrackFiles  = "Burndown.TrackFiles"
+	ConfigBurndownTrackPeople = "Burndown.TrackPeople"
+	ConfigBurndownDebug       = "Burndown.Debug"
+)
+
 func (analyser *BurndownAnalysis) Name() string {
 func (analyser *BurndownAnalysis) Name() string {
 	return "Burndown"
 	return "Burndown"
 }
 }
@@ -82,25 +90,66 @@ func (analyser *BurndownAnalysis) Requires() []string {
 	return arr[:]
 	return arr[:]
 }
 }
 
 
-func (analyser *BurndownAnalysis) Construct(facts map[string]interface{}) {
-	if val, exists := facts["Burndown.Granularity"].(int); exists {
+func (analyser *BurndownAnalysis) ListConfigurationOptions() []ConfigurationOption {
+	options := [...]ConfigurationOption{{
+		Name:        ConfigBurndownGranularity,
+		Description: "How many days there are in a single band.",
+		Flag:        "granularity",
+		Type:        IntConfigurationOption,
+		Default:     30}, {
+		Name:        ConfigBurndownSampling,
+		Description: "How frequently to record the state in days.",
+		Flag:        "sampling",
+		Type:        IntConfigurationOption,
+		Default:     30}, {
+		Name:        ConfigBurndownTrackFiles,
+		Description: "Record detailed statistics per each file.",
+		Flag:        "burndown-files",
+		Type:        BoolConfigurationOption,
+		Default:     false}, {
+		Name:        ConfigBurndownTrackPeople,
+		Description: "Record detailed statistics per each developer.",
+		Flag:        "burndown-people",
+		Type:        BoolConfigurationOption,
+		Default:     false}, {
+		Name:        ConfigBurndownDebug,
+		Description: "Validate the trees on each step.",
+		Flag:        "burndown-debug",
+		Type:        BoolConfigurationOption,
+		Default:     false},
+	}
+	return options[:]
+}
+
+func (analyser *BurndownAnalysis) Configure(facts map[string]interface{}) {
+	if val, exists := facts[ConfigBurndownGranularity].(int); exists {
 		analyser.Granularity = val
 		analyser.Granularity = val
 	}
 	}
-	if val, exists := facts["Burndown.Sampling"].(int); exists {
+	if val, exists := facts[ConfigBurndownSampling].(int); exists {
 		analyser.Sampling = val
 		analyser.Sampling = val
 	}
 	}
-	if val, exists := facts["Burndown.TrackFiles"].(bool); exists {
+	if val, exists := facts[ConfigBurndownTrackFiles].(bool); exists {
 		analyser.TrackFiles = val
 		analyser.TrackFiles = val
 	}
 	}
-	if val, exists := facts["PeopleNumber"].(int); exists {
-		analyser.PeopleNumber = val
+	if people, _ := facts[ConfigBurndownTrackPeople].(bool); people {
+		if val, exists := facts[FactIdentityDetectorPeopleCount].(int); exists {
+			analyser.PeopleNumber = val
+		}
 	}
 	}
-	if val, exists := facts["Burndown.Debug"].(bool); exists {
+	if val, exists := facts[ConfigBurndownDebug].(bool); exists {
 		analyser.Debug = val
 		analyser.Debug = val
 	}
 	}
 }
 }
 
 
 func (analyser *BurndownAnalysis) Initialize(repository *git.Repository) {
 func (analyser *BurndownAnalysis) Initialize(repository *git.Repository) {
+	if analyser.Granularity <= 0 {
+		fmt.Fprintln(os.Stderr, "Warning: adjusted the granularity to 30 days")
+		analyser.Granularity = 30
+	}
+	if analyser.Sampling <= 0 {
+		fmt.Fprintln(os.Stderr, "Warning: adjusted the sampling to 30 days")
+		analyser.Sampling = 30
+	}
 	analyser.repository = repository
 	analyser.repository = repository
 	analyser.globalStatus = map[int]int64{}
 	analyser.globalStatus = map[int]int64{}
 	analyser.globalHistory = [][]int64{}
 	analyser.globalHistory = [][]int64{}
@@ -551,5 +600,5 @@ func (analyser *BurndownAnalysis) updateHistories(
 }
 }
 
 
 func init() {
 func init() {
-  Registry.Register(&BurndownAnalysis{})
+	Registry.Register(&BurndownAnalysis{})
 }
 }

+ 77 - 98
cmd/hercules/main.go

@@ -29,11 +29,9 @@ package main
 
 
 import (
 import (
 	"bytes"
 	"bytes"
-	"context"
 	"flag"
 	"flag"
 	"fmt"
 	"fmt"
 	"io"
 	"io"
-	"io/ioutil"
 	"net/http"
 	"net/http"
 	_ "net/http/pprof"
 	_ "net/http/pprof"
 	"os"
 	"os"
@@ -50,8 +48,9 @@ import (
 	"gopkg.in/src-d/hercules.v2"
 	"gopkg.in/src-d/hercules.v2"
 	"gopkg.in/src-d/hercules.v2/stdout"
 	"gopkg.in/src-d/hercules.v2/stdout"
 	"gopkg.in/src-d/hercules.v2/pb"
 	"gopkg.in/src-d/hercules.v2/pb"
+	"github.com/vbauerster/mpb"
+	"github.com/vbauerster/mpb/decor"
 	"github.com/gogo/protobuf/proto"
 	"github.com/gogo/protobuf/proto"
-	"time"
 )
 )
 
 
 func sortedKeys(m map[string][][]int64) []string {
 func sortedKeys(m map[string][][]int64) []string {
@@ -80,61 +79,7 @@ func (writer OneLineWriter) Write(p []byte) (n int, err error) {
 	return
 	return
 }
 }
 
 
-func main() {
-	var protobuf bool
-	var withFiles bool
-	var withPeople bool
-	var withCouples bool
-	var withUasts bool
-	var people_dict_path string
-	var profile bool
-	var granularity, sampling, similarityThreshold int
-	var bblfshEndpoint string
-	var bblfshTimeout int
-	var uastPoolSize int
-	var commitsFile string
-	var ignoreMissingSubmodules bool
-	var debug bool
-	flag.BoolVar(&withFiles, "files", false, "Output detailed statistics per each file.")
-	flag.BoolVar(&withPeople, "people", false, "Output detailed statistics per each developer.")
-	flag.BoolVar(&withCouples, "couples", false, "Gather the co-occurrence matrix "+
-		"for files and people.")
-	flag.BoolVar(&withUasts, "uasts", false, "Output pairs of Universal Abstract Syntax Trees for " +
-			"every changed file in each commit.")
-	flag.StringVar(&people_dict_path, "people-dict", "", "Path to the developers' email associations.")
-	flag.BoolVar(&profile, "profile", false, "Collect the profile to hercules.pprof.")
-	flag.IntVar(&granularity, "granularity", 30, "How many days there are in a single band.")
-	flag.IntVar(&sampling, "sampling", 30, "How frequently to record the state in days.")
-	flag.IntVar(&similarityThreshold, "M", 90,
-		"A threshold on the similarity index used to detect renames.")
-	flag.BoolVar(&debug, "debug", false, "Validate the trees on each step.")
-	flag.StringVar(&commitsFile, "commits", "", "Path to the text file with the "+
-		"commit history to follow instead of the default rev-list "+
-		"--first-parent. The format is the list of hashes, each hash on a "+
-		"separate line. The first hash is the root.")
-	flag.BoolVar(&ignoreMissingSubmodules, "ignore-missing-submodules", false,
-		"Do not panic on submodules which are not registered..")
-	flag.BoolVar(&protobuf, "pb", false, "The output format will be Protocol Buffers instead of YAML.")
-	flag.IntVar(&uastPoolSize, "uast-pool-size", 1, "Number of goroutines to extract UASTs.")
-	flag.StringVar(&bblfshEndpoint, "bblfsh", "0.0.0.0:9432", "Babelfish server's endpoint.")
-	flag.IntVar(&bblfshTimeout, "bblfsh-timeout", 20, "Babelfish's server timeout.")
-	flag.Parse()
-	if granularity <= 0 {
-		fmt.Fprint(os.Stderr, "Warning: adjusted the granularity to 1 day\n")
-		granularity = 1
-	}
-	if profile {
-		go http.ListenAndServe("localhost:6060", nil)
-		prof, _ := os.Create("hercules.pprof")
-		pprof.StartCPUProfile(prof)
-		defer pprof.StopCPUProfile()
-	}
-	if len(flag.Args()) == 0 || len(flag.Args()) > 3 {
-		fmt.Fprint(os.Stderr,
-			"Usage: hercules <path to repo or URL> [<disk cache path>]\n")
-		os.Exit(1)
-	}
-	uri := flag.Arg(0)
+func loadRepository(uri string) *git.Repository {
 	var repository *git.Repository
 	var repository *git.Repository
 	var backend storage.Storer
 	var backend storage.Storer
 	var err error
 	var err error
@@ -152,7 +97,7 @@ func main() {
 		} else {
 		} else {
 			backend = memory.NewStorage()
 			backend = memory.NewStorage()
 		}
 		}
-		fmt.Fprint(os.Stderr, "cloning...\r")
+		fmt.Fprint(os.Stderr, "connecting...\r")
 		repository, err = git.Clone(backend, nil, &git.CloneOptions{
 		repository, err = git.Clone(backend, nil, &git.CloneOptions{
 			URL: uri,
 			URL: uri,
 			Progress: OneLineWriter{Writer: os.Stderr},
 			Progress: OneLineWriter{Writer: os.Stderr},
@@ -167,31 +112,76 @@ func main() {
 	if err != nil {
 	if err != nil {
 		panic(err)
 		panic(err)
 	}
 	}
+	return repository
+}
+
+func main() {
+	var protobuf bool
+	var profile bool
+	var commitsFile string
+	var withBurndown bool
+	var withCouples bool
+	flag.BoolVar(&profile, "profile", false, "Collect the profile to hercules.pprof.")
+	flag.StringVar(&commitsFile, "commits", "", "Path to the text file with the "+
+		"commit history to follow instead of the default rev-list "+
+		"--first-parent. The format is the list of hashes, each hash on a "+
+		"separate line. The first hash is the root.")
+	flag.BoolVar(&protobuf, "pb", false, "The output format will be Protocol Buffers instead of YAML.")
+	flag.BoolVar(&withBurndown, "burndown", false, "Analyse lines burndown.")
+	flag.BoolVar(&withCouples, "couples", false, "Analyse file and developer couples.")
+	facts := hercules.Registry.AddFlags()
+	flag.Parse()
+
+	if profile {
+		go http.ListenAndServe("localhost:6060", nil)
+		prof, _ := os.Create("hercules.pprof")
+		pprof.StartCPUProfile(prof)
+		defer pprof.StopCPUProfile()
+	}
+	if len(flag.Args()) == 0 || len(flag.Args()) > 3 {
+		fmt.Fprint(os.Stderr,
+			"Usage: hercules <path to repo or URL> [<disk cache path>]\n")
+		os.Exit(1)
+	}
+	uri := flag.Arg(0)
+	repository := loadRepository(uri)
 
 
 	// core logic
 	// core logic
 	pipeline := hercules.NewPipeline(repository)
 	pipeline := hercules.NewPipeline(repository)
+	pipeline.SetFeaturesFromFlags()
+	progress := mpb.New(mpb.Output(os.Stderr))
+	var bar *mpb.Bar
 	pipeline.OnProgress = func(commit, length int) {
 	pipeline.OnProgress = func(commit, length int) {
-		if commit < length {
-			fmt.Fprintf(os.Stderr, "%d / %d\r", commit, length)
-		} else {
-			fmt.Fprint(os.Stderr, "finalizing...    \r")
+		if bar == nil {
+			bar = progress.AddBar(int64(length + 1),
+				mpb.PrependDecorators(decor.DynamicName(
+					func (stats *decor.Statistics) string {
+						if stats.Current < stats.Total {
+							return fmt.Sprintf("%d / %d", stats.Current, length)
+						}
+						return "finalizing"
+					}, 10, 0)),
+				mpb.AppendDecorators(decor.ETA(4, 0)),
+			)
 		}
 		}
+		bar.Incr(commit - int(bar.Current()))
 	}
 	}
-	// list of commits belonging to the default branch, from oldest to newest
-	// rev-list --first-parent
+
 	var commits []*object.Commit
 	var commits []*object.Commit
 	if commitsFile == "" {
 	if commitsFile == "" {
+		// list of commits belonging to the default branch, from oldest to newest
+		// rev-list --first-parent
 		commits = pipeline.Commits()
 		commits = pipeline.Commits()
 	} else {
 	} else {
+		var err error
 		commits, err = hercules.LoadCommitsFromFile(commitsFile, repository)
 		commits, err = hercules.LoadCommitsFromFile(commitsFile, repository)
 		if err != nil {
 		if err != nil {
 			panic(err)
 			panic(err)
 		}
 		}
 	}
 	}
+	facts["commits"] = commits
 
 
-	pipeline.AddItem(&hercules.BlobCache{
-		IgnoreMissingSubmodules: ignoreMissingSubmodules,
-	})
+	/*
 	var uastSaver *hercules.UASTChangesSaver
 	var uastSaver *hercules.UASTChangesSaver
 	if withUasts {
 	if withUasts {
 		pipeline.AddItem(&hercules.UASTExtractor{
 		pipeline.AddItem(&hercules.UASTExtractor{
@@ -207,42 +197,26 @@ func main() {
 		uastSaver = &hercules.UASTChangesSaver{}
 		uastSaver = &hercules.UASTChangesSaver{}
 		pipeline.AddItem(uastSaver)
 		pipeline.AddItem(uastSaver)
 	}
 	}
-	pipeline.AddItem(&hercules.DaysSinceStart{})
-	pipeline.AddItem(&hercules.RenameAnalysis{SimilarityThreshold: similarityThreshold})
-	pipeline.AddItem(&hercules.TreeDiff{})
-	pipeline.AddItem(&hercules.FileDiff{})
-	idMatcher := &hercules.IdentityDetector{}
-	var peopleCount int
-	if withPeople || withCouples {
-		if people_dict_path != "" {
-			idMatcher.LoadPeopleDict(people_dict_path)
-			peopleCount = len(idMatcher.ReversedPeopleDict) - 1
-		} else {
-			idMatcher.GeneratePeopleDict(commits)
-			peopleCount = len(idMatcher.ReversedPeopleDict)
-		}
-	}
-	pipeline.AddItem(idMatcher)
-	burndowner := &hercules.BurndownAnalysis{
-		Granularity:  granularity,
-		Sampling:     sampling,
-		Debug:        debug,
-		TrackFiles:   withFiles,
-		PeopleNumber: peopleCount,
+	*/
+	var burndowner hercules.PipelineItem
+	if withBurndown {
+		burndowner = pipeline.DeployItem(&hercules.BurndownAnalysis{})
 	}
 	}
-	pipeline.AddItem(burndowner)
-	var coupler *hercules.Couples
+	var coupler hercules.PipelineItem
 	if withCouples {
 	if withCouples {
-		coupler = &hercules.Couples{PeopleNumber: peopleCount}
-		pipeline.AddItem(coupler)
+		coupler = pipeline.DeployItem(&hercules.Couples{})
 	}
 	}
-	facts := map[string]interface{}{}
 	pipeline.Initialize(facts)
 	pipeline.Initialize(facts)
 	result, err := pipeline.Run(commits)
 	result, err := pipeline.Run(commits)
 	if err != nil {
 	if err != nil {
 		panic(err)
 		panic(err)
 	}
 	}
+	progress.Stop()
 	fmt.Fprint(os.Stderr, "writing...    \r")
 	fmt.Fprint(os.Stderr, "writing...    \r")
+	_ = result
+	_ = burndowner
+	_ = coupler
+	/*
 	burndownResults := result[burndowner].(hercules.BurndownResult)
 	burndownResults := result[burndowner].(hercules.BurndownResult)
 	if len(burndownResults.GlobalHistory) == 0 {
 	if len(burndownResults.GlobalHistory) == 0 {
 		return
 		return
@@ -251,6 +225,8 @@ func main() {
 	if withCouples {
 	if withCouples {
 		couplesResult = result[coupler].(hercules.CouplesResult)
 		couplesResult = result[coupler].(hercules.CouplesResult)
 	}
 	}
+	*/
+	/*
 	if withUasts {
 	if withUasts {
 		changedUasts := result[uastSaver].([][]hercules.UASTChange)
 		changedUasts := result[uastSaver].([][]hercules.UASTChange)
 		for i, changes := range changedUasts {
 		for i, changes := range changedUasts {
@@ -275,17 +251,20 @@ func main() {
 			}
 			}
 		}
 		}
 	}
 	}
+	*/
+	/*
+	reversedPeopleDict := facts[hercules.FactIdentityDetectorReversedPeopleDict].([]string)
 	begin := commits[0].Author.When.Unix()
 	begin := commits[0].Author.When.Unix()
 	end := commits[len(commits)-1].Author.When.Unix()
 	end := commits[len(commits)-1].Author.When.Unix()
 	if !protobuf {
 	if !protobuf {
 		printResults(uri, begin, end, granularity, sampling,
 		printResults(uri, begin, end, granularity, sampling,
 			withFiles, withPeople, withCouples,
 			withFiles, withPeople, withCouples,
-			burndownResults, couplesResult, idMatcher.ReversedPeopleDict)
+			burndownResults, couplesResult, reversedPeopleDict)
 	} else {
 	} else {
 		serializeResults(uri, begin, end, granularity, sampling,
 		serializeResults(uri, begin, end, granularity, sampling,
 			withFiles, withPeople, withCouples,
 			withFiles, withPeople, withCouples,
-			burndownResults, couplesResult, idMatcher.ReversedPeopleDict)
-	}
+			burndownResults, couplesResult, reversedPeopleDict)
+	}*/
 }
 }
 
 
 func printResults(
 func printResults(

+ 7 - 3
couples.go

@@ -40,8 +40,12 @@ func (couples *Couples) Requires() []string {
 	return arr[:]
 	return arr[:]
 }
 }
 
 
-func (couples *Couples) Construct(facts map[string]interface{}) {
-	if val, exists := facts["PeopleNumber"].(int); exists {
+func (couples *Couples) ListConfigurationOptions() []ConfigurationOption {
+	return []ConfigurationOption{}
+}
+
+func (couples *Couples) Configure(facts map[string]interface{}) {
+	if val, exists := facts[FactIdentityDetectorPeopleCount].(int); exists {
 		couples.PeopleNumber = val
 		couples.PeopleNumber = val
 	}
 	}
 }
 }
@@ -169,5 +173,5 @@ func (couples *Couples) Finalize() interface{} {
 }
 }
 
 
 func init() {
 func init() {
-  Registry.Register(&Couples{})
+	Registry.Register(&Couples{})
 }
 }

+ 6 - 6
day.go

@@ -25,7 +25,11 @@ func (days *DaysSinceStart) Requires() []string {
 	return []string{}
 	return []string{}
 }
 }
 
 
-func (days *DaysSinceStart) Construct(facts map[string]interface{}) {}
+func (days *DaysSinceStart) ListConfigurationOptions() []ConfigurationOption {
+	return []ConfigurationOption{}
+}
+
+func (days *DaysSinceStart) Configure(facts map[string]interface{}) {}
 
 
 func (days *DaysSinceStart) Initialize(repository *git.Repository) {
 func (days *DaysSinceStart) Initialize(repository *git.Repository) {
 	days.day0 = time.Time{}
 	days.day0 = time.Time{}
@@ -48,10 +52,6 @@ func (days *DaysSinceStart) Consume(deps map[string]interface{}) (map[string]int
 	return map[string]interface{}{"day": day}, nil
 	return map[string]interface{}{"day": day}, nil
 }
 }
 
 
-func (days *DaysSinceStart) Finalize() interface{} {
-	return nil
-}
-
 func init() {
 func init() {
-  Registry.Register(&DaysSinceStart{})
+	Registry.Register(&DaysSinceStart{})
 }
 }

+ 0 - 6
day_test.go

@@ -21,12 +21,6 @@ func TestDaysSinceStartMeta(t *testing.T) {
 	assert.Equal(t, len(dss.Requires()), 0)
 	assert.Equal(t, len(dss.Requires()), 0)
 }
 }
 
 
-func TestDaysSinceStartFinalize(t *testing.T) {
-	dss := fixtureDaysSinceStart()
-	r := dss.Finalize()
-	assert.Nil(t, r)
-}
-
 func TestDaysSinceStartConsume(t *testing.T) {
 func TestDaysSinceStartConsume(t *testing.T) {
 	dss := fixtureDaysSinceStart()
 	dss := fixtureDaysSinceStart()
 	deps := map[string]interface{}{}
 	deps := map[string]interface{}{}

+ 6 - 6
diff.go

@@ -35,7 +35,11 @@ func (diff *FileDiff) Requires() []string {
 	return arr[:]
 	return arr[:]
 }
 }
 
 
-func (diff *FileDiff) Construct(facts map[string]interface{}) {}
+func (diff *FileDiff) ListConfigurationOptions() []ConfigurationOption {
+	return []ConfigurationOption{}
+}
+
+func (diff *FileDiff) Configure(facts map[string]interface{}) {}
 
 
 func (diff *FileDiff) Initialize(repository *git.Repository) {}
 func (diff *FileDiff) Initialize(repository *git.Repository) {}
 
 
@@ -77,10 +81,6 @@ func (diff *FileDiff) Consume(deps map[string]interface{}) (map[string]interface
 	return map[string]interface{}{"file_diff": result}, nil
 	return map[string]interface{}{"file_diff": result}, nil
 }
 }
 
 
-func (diff *FileDiff) Finalize() interface{} {
-	return nil
-}
-
 func blobToString(file *object.Blob) (string, error) {
 func blobToString(file *object.Blob) (string, error) {
 	if file == nil {
 	if file == nil {
 		return "", errors.New("Blob not cached.")
 		return "", errors.New("Blob not cached.")
@@ -96,5 +96,5 @@ func blobToString(file *object.Blob) (string, error) {
 }
 }
 
 
 func init() {
 func init() {
-  Registry.Register(&FileDiff{})
+	Registry.Register(&FileDiff{})
 }
 }

+ 8 - 8
diff_refiner.go

@@ -26,7 +26,11 @@ func (ref *FileDiffRefiner) Features() []string {
 	return arr[:]
 	return arr[:]
 }
 }
 
 
-func (ref *FileDiffRefiner) Construct(facts map[string]interface{}) {}
+func (ref *FileDiffRefiner) ListConfigurationOptions() []ConfigurationOption {
+	return []ConfigurationOption{}
+}
+
+func (ref *FileDiffRefiner) Configure(facts map[string]interface{}) {}
 
 
 func (ref *FileDiffRefiner) Initialize(repository *git.Repository) {
 func (ref *FileDiffRefiner) Initialize(repository *git.Repository) {
 }
 }
@@ -40,18 +44,14 @@ func (ref *FileDiffRefiner) Consume(deps map[string]interface{}) (map[string]int
 		}
 		}
 	}
 	}
 	diffs := deps["file_diff"].(map[string]FileDiffData)
 	diffs := deps["file_diff"].(map[string]FileDiffData)
-	for fileName, _/*diff*/ := range diffs {
-		_/*change*/ = changes[fileName]
+	for fileName, _ /*diff*/ := range diffs {
+		_ /*change*/ = changes[fileName]
 		// TODO: scan diff line by line
 		// TODO: scan diff line by line
 	}
 	}
 	result := map[string]FileDiffData{}
 	result := map[string]FileDiffData{}
 	return map[string]interface{}{"file_diff": result}, nil
 	return map[string]interface{}{"file_diff": result}, nil
 }
 }
 
 
-func (ref *FileDiffRefiner) Finalize() interface{} {
-	return nil
-}
-
 func init() {
 func init() {
-  Registry.Register(&FileDiffRefiner{})
+	Registry.Register(&FileDiffRefiner{})
 }
 }

+ 0 - 6
diff_test.go

@@ -26,12 +26,6 @@ func TestFileDiffMeta(t *testing.T) {
 	assert.Equal(t, fd.Requires()[1], "blob_cache")
 	assert.Equal(t, fd.Requires()[1], "blob_cache")
 }
 }
 
 
-func TestFileDiffFinalize(t *testing.T) {
-	fd := fixtureFileDiff()
-	r := fd.Finalize()
-	assert.Nil(t, r)
-}
-
 func TestFileDiffConsume(t *testing.T) {
 func TestFileDiffConsume(t *testing.T) {
 	fd := fixtureFileDiff()
 	fd := fixtureFileDiff()
 	deps := map[string]interface{}{}
 	deps := map[string]interface{}{}

+ 30 - 14
identity.go

@@ -17,8 +17,15 @@ type IdentityDetector struct {
 	ReversedPeopleDict []string
 	ReversedPeopleDict []string
 }
 }
 
 
-const MISSING_AUTHOR = (1 << 18) - 1
-const SELF_AUTHOR = (1 << 18) - 2
+const (
+	MISSING_AUTHOR = (1 << 18) - 1
+	SELF_AUTHOR    = (1 << 18) - 2
+
+	FactIdentityDetectorPeopleDict         = "IdentityDetector.PeopleDict"
+	FactIdentityDetectorReversedPeopleDict = "IdentityDetector.ReversedPeopleDict"
+	ConfigIdentityDetectorPeopleDictPath   = "IdentityDetector.PeopleDictPath"
+	FactIdentityDetectorPeopleCount        = "IdentityDetector.PeopleCount"
+)
 
 
 func (id *IdentityDetector) Name() string {
 func (id *IdentityDetector) Name() string {
 	return "IdentityDetector"
 	return "IdentityDetector"
@@ -33,28 +40,41 @@ func (id *IdentityDetector) Requires() []string {
 	return []string{}
 	return []string{}
 }
 }
 
 
-func (id *IdentityDetector) Construct(facts map[string]interface{}) {
-	if val, exists := facts["IdentityDetector.PeopleDict"].(map[string]int); exists {
+func (id *IdentityDetector) ListConfigurationOptions() []ConfigurationOption {
+	options := [...]ConfigurationOption{{
+		Name:        ConfigIdentityDetectorPeopleDictPath,
+		Description: "Path to the developers' email associations.",
+		Flag:        "people-dict",
+		Type:        StringConfigurationOption,
+		Default:     ""},
+	}
+	return options[:]
+}
+
+func (id *IdentityDetector) Configure(facts map[string]interface{}) {
+	if val, exists := facts[FactIdentityDetectorPeopleDict].(map[string]int); exists {
 		id.PeopleDict = val
 		id.PeopleDict = val
 	}
 	}
-	if val, exists := facts["IdentityDetector.ReversedPeopleDict"].([]string); exists {
+	if val, exists := facts[FactIdentityDetectorReversedPeopleDict].([]string); exists {
 		id.ReversedPeopleDict = val
 		id.ReversedPeopleDict = val
 	}
 	}
 	if id.PeopleDict == nil {
 	if id.PeopleDict == nil {
 		if id.ReversedPeopleDict != nil {
 		if id.ReversedPeopleDict != nil {
 			panic("IdentityDetector: ReversedPeopleDict != nil while PeopleDict == nil")
 			panic("IdentityDetector: ReversedPeopleDict != nil while PeopleDict == nil")
 		}
 		}
-		peopleDictPath := facts["IdentityDetector.PeopleDictPath"].(string)
+		peopleDictPath, _ := facts[ConfigIdentityDetectorPeopleDictPath].(string)
 		if peopleDictPath != "" {
 		if peopleDictPath != "" {
 			id.LoadPeopleDict(peopleDictPath)
 			id.LoadPeopleDict(peopleDictPath)
-			facts["PeopleCount"] = len(id.ReversedPeopleDict) - 1
+			facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict) - 1
 		} else {
 		} else {
 			id.GeneratePeopleDict(facts["commits"].([]*object.Commit))
 			id.GeneratePeopleDict(facts["commits"].([]*object.Commit))
-			facts["PeopleCount"] = len(id.ReversedPeopleDict)
+			facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict)
 		}
 		}
 	} else {
 	} else {
-		facts["PeopleCount"] = len(id.ReversedPeopleDict)
+		facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict)
 	}
 	}
+	facts[FactIdentityDetectorPeopleDict] = id.PeopleDict
+	facts[FactIdentityDetectorReversedPeopleDict] = id.ReversedPeopleDict
 }
 }
 
 
 func (id *IdentityDetector) Initialize(repository *git.Repository) {
 func (id *IdentityDetector) Initialize(repository *git.Repository) {
@@ -73,10 +93,6 @@ func (self *IdentityDetector) Consume(deps map[string]interface{}) (map[string]i
 	return map[string]interface{}{"author": id}, nil
 	return map[string]interface{}{"author": id}, nil
 }
 }
 
 
-func (id *IdentityDetector) Finalize() interface{} {
-	return nil
-}
-
 func (id *IdentityDetector) LoadPeopleDict(path string) error {
 func (id *IdentityDetector) LoadPeopleDict(path string) error {
 	file, err := os.Open(path)
 	file, err := os.Open(path)
 	if err != nil {
 	if err != nil {
@@ -197,5 +213,5 @@ func (id *IdentityDetector) GeneratePeopleDict(commits []*object.Commit) {
 }
 }
 
 
 func init() {
 func init() {
-  Registry.Register(&IdentityDetector{})
+	Registry.Register(&IdentityDetector{})
 }
 }

+ 0 - 6
identity_test.go

@@ -141,12 +141,6 @@ func TestGeneratePeopleDict(t *testing.T) {
 	assert.Equal(t, id.ReversedPeopleDict[2], "máximo cuadros|mcuadros@gmail.com")
 	assert.Equal(t, id.ReversedPeopleDict[2], "máximo cuadros|mcuadros@gmail.com")
 }
 }
 
 
-func TestIdentityDetectorFinalize(t *testing.T) {
-	id := fixtureIdentityDetector()
-	res := id.Finalize()
-	assert.Nil(t, res)
-}
-
 func TestLoadPeopleDictInvalidPath(t *testing.T) {
 func TestLoadPeopleDictInvalidPath(t *testing.T) {
 	id := fixtureIdentityDetector()
 	id := fixtureIdentityDetector()
 	ipath := "/xxxyyyzzzInvalidPath!hehe"
 	ipath := "/xxxyyyzzzInvalidPath!hehe"

+ 153 - 38
pipeline.go

@@ -3,11 +3,14 @@ package hercules
 import (
 import (
 	"bufio"
 	"bufio"
 	"errors"
 	"errors"
+	"flag"
 	"fmt"
 	"fmt"
 	"io"
 	"io"
 	"io/ioutil"
 	"io/ioutil"
 	"os"
 	"os"
 	"reflect"
 	"reflect"
+	"strings"
+	"unsafe"
 
 
 	"gopkg.in/src-d/go-git.v4"
 	"gopkg.in/src-d/go-git.v4"
 	"gopkg.in/src-d/go-git.v4/plumbing"
 	"gopkg.in/src-d/go-git.v4/plumbing"
@@ -15,6 +18,32 @@ import (
 	"gopkg.in/src-d/hercules.v2/toposort"
 	"gopkg.in/src-d/hercules.v2/toposort"
 )
 )
 
 
+type ConfigurationOptionType int
+
+const (
+	// Boolean value type.
+	BoolConfigurationOption ConfigurationOptionType = iota
+	// Integer value type.
+	IntConfigurationOption
+	// String value type.
+	StringConfigurationOption
+)
+
+// ConfigurationOption allows for the unified, retrospective way to setup PipelineItem-s.
+type ConfigurationOption struct {
+	// Name identifies the configuration option in facts.
+	Name string
+	// Description represents the help text about the configuration option.
+	Description string
+	// Flag corresponds to the CLI token with "-" prepended.
+	Flag string
+	// Type specifies the kind of the configuration option's value.
+	Type ConfigurationOptionType
+	// Default is the initial value of the configuration option.
+	Default interface{}
+}
+
+// PipelineItem is the interface for all the units of the Git commit analysis pipeline.
 type PipelineItem interface {
 type PipelineItem interface {
 	// Name returns the name of the analysis.
 	// Name returns the name of the analysis.
 	Name() string
 	Name() string
@@ -23,9 +52,11 @@ type PipelineItem interface {
 	Provides() []string
 	Provides() []string
 	// Requires returns the list of keys of needed entities which must be supplied in Consume().
 	// Requires returns the list of keys of needed entities which must be supplied in Consume().
 	Requires() []string
 	Requires() []string
-	// Construct performs the initial creation of the object by taking parameters from facts.
+	// ListConfigurationOptions returns the list of available options which can be consumed by Configure().
+	ListConfigurationOptions() []ConfigurationOption
+	// Configure performs the initial setup of the object by applying parameters from facts.
 	// It allows to create PipelineItems in a universal way.
 	// It allows to create PipelineItems in a universal way.
-	Construct(facts map[string]interface{})
+	Configure(facts map[string]interface{})
 	// Initialize prepares and resets the item. Consume() requires Initialize()
 	// Initialize prepares and resets the item. Consume() requires Initialize()
 	// to be called at least once beforehand.
 	// to be called at least once beforehand.
 	Initialize(*git.Repository)
 	Initialize(*git.Repository)
@@ -34,10 +65,9 @@ type PipelineItem interface {
 	// "commit" and "index".
 	// "commit" and "index".
 	// Returns the calculated entities which match Provides().
 	// Returns the calculated entities which match Provides().
 	Consume(deps map[string]interface{}) (map[string]interface{}, error)
 	Consume(deps map[string]interface{}) (map[string]interface{}, error)
-	// Finalize returns the result of the analysis.
-	Finalize() interface{}
 }
 }
 
 
+// FeaturedPipelineItem enables switching the automatic insertion of pipeline items on or off.
 type FeaturedPipelineItem interface {
 type FeaturedPipelineItem interface {
 	PipelineItem
 	PipelineItem
 	// Features returns the list of names which enable this item to be automatically inserted
 	// Features returns the list of names which enable this item to be automatically inserted
@@ -45,15 +75,21 @@ type FeaturedPipelineItem interface {
 	Features() []string
 	Features() []string
 }
 }
 
 
+// FinalizedPipelineItem corresponds to the top level pipeline items which produce the end results.
+type FinalizedPipelineItem interface {
+	PipelineItem
+	// Finalize returns the result of the analysis.
+	Finalize() interface{}
+}
+
 type PipelineItemRegistry struct {
 type PipelineItemRegistry struct {
-	provided map[string][]reflect.Type
+	provided   map[string][]reflect.Type
+	registered map[string]reflect.Type
 }
 }
 
 
 func (registry *PipelineItemRegistry) Register(example PipelineItem) {
 func (registry *PipelineItemRegistry) Register(example PipelineItem) {
-	if registry.provided == nil {
-		registry.provided = map[string][]reflect.Type{}
-	}
 	t := reflect.TypeOf(example)
 	t := reflect.TypeOf(example)
+	registry.registered[example.Name()] = t
 	for _, dep := range example.Provides() {
 	for _, dep := range example.Provides() {
 		ts := registry.provided[dep]
 		ts := registry.provided[dep]
 		if ts == nil {
 		if ts == nil {
@@ -76,10 +112,76 @@ func (registry *PipelineItemRegistry) Summon(provides string) []PipelineItem {
 	return items
 	return items
 }
 }
 
 
-var Registry = &PipelineItemRegistry{}
+type arrayFeatureFlags struct {
+	// Flags containts the features activated through the command line.
+	Flags []string
+	// Choices contains all registered features.
+	Choices map[string]bool
+}
+
+func (acf *arrayFeatureFlags) String() string {
+	return strings.Join([]string(acf.Flags), ", ")
+}
+
+func (acf *arrayFeatureFlags) Set(value string) error {
+	if _, exists := acf.Choices[value]; !exists {
+		return errors.New(fmt.Sprintf("Feature \"%s\" is not registered.", value))
+	}
+	acf.Flags = append(acf.Flags, value)
+	return nil
+}
+
+var featureFlags = arrayFeatureFlags{Flags: []string{}, Choices: map[string]bool{}}
+
+func (registry *PipelineItemRegistry) AddFlags() map[string]interface{} {
+	flags := map[string]interface{}{}
+	for name, it := range registry.registered {
+		formatHelp := func(desc string) string {
+			return fmt.Sprintf("%s [%s]", desc, name)
+		}
+		itemIface := reflect.New(it.Elem()).Interface()
+		for _, opt := range itemIface.(PipelineItem).ListConfigurationOptions() {
+			var iface interface{}
+			switch opt.Type {
+			case BoolConfigurationOption:
+				iface = interface{}(true)
+				ptr := (**bool)(unsafe.Pointer(uintptr(unsafe.Pointer(&iface)) + unsafe.Sizeof(&iface)))
+				*ptr = flag.Bool(opt.Flag, opt.Default.(bool), formatHelp(opt.Description))
+			case IntConfigurationOption:
+				iface = interface{}(0)
+				ptr := (**int)(unsafe.Pointer(uintptr(unsafe.Pointer(&iface)) + unsafe.Sizeof(&iface)))
+				*ptr = flag.Int(opt.Flag, opt.Default.(int), formatHelp(opt.Description))
+			case StringConfigurationOption:
+				iface = interface{}("")
+				ptr := (**string)(unsafe.Pointer(uintptr(unsafe.Pointer(&iface)) + unsafe.Sizeof(&iface)))
+				*ptr = flag.String(opt.Flag, opt.Default.(string), formatHelp(opt.Description))
+			}
+			flags[opt.Name] = iface
+		}
+		if fpi, ok := itemIface.(FeaturedPipelineItem); ok {
+			for _, f := range fpi.Features() {
+				featureFlags.Choices[f] = true
+			}
+		}
+	}
+	features := []string{}
+	for f := range featureFlags.Choices {
+		features = append(features, f)
+	}
+	flag.Var(&featureFlags, "feature",
+		fmt.Sprintf("Enables specific analysis features, can be specified "+
+			"multiple times. Available features: [%s].", strings.Join(features, ", ")))
+	return flags
+}
+
+// Registry contains all known pipeline item types.
+var Registry = &PipelineItemRegistry{
+	provided:   map[string][]reflect.Type{},
+	registered: map[string]reflect.Type{},
+}
 
 
 type wrappedPipelineItem struct {
 type wrappedPipelineItem struct {
-	Item PipelineItem
+	Item     PipelineItem
 	Children []wrappedPipelineItem
 	Children []wrappedPipelineItem
 }
 }
 
 
@@ -106,9 +208,9 @@ type Pipeline struct {
 func NewPipeline(repository *git.Repository) *Pipeline {
 func NewPipeline(repository *git.Repository) *Pipeline {
 	return &Pipeline{
 	return &Pipeline{
 		repository: repository,
 		repository: repository,
-		items: []PipelineItem{},
-		facts: map[string]interface{}{},
-		features: map[string]bool{},
+		items:      []PipelineItem{},
+		facts:      map[string]interface{}{},
+		features:   map[string]bool{},
 	}
 	}
 }
 }
 
 
@@ -128,6 +230,12 @@ func (pipeline *Pipeline) SetFeature(name string) {
 	pipeline.features[name] = true
 	pipeline.features[name] = true
 }
 }
 
 
+func (pipeline *Pipeline) SetFeaturesFromFlags() {
+	for _, feature := range featureFlags.Flags {
+		pipeline.SetFeature(feature)
+	}
+}
+
 func (pipeline *Pipeline) DeployItem(item PipelineItem) PipelineItem {
 func (pipeline *Pipeline) DeployItem(item PipelineItem) PipelineItem {
 	queue := []PipelineItem{}
 	queue := []PipelineItem{}
 	queue = append(queue, item)
 	queue = append(queue, item)
@@ -138,27 +246,27 @@ func (pipeline *Pipeline) DeployItem(item PipelineItem) PipelineItem {
 		head := queue[0]
 		head := queue[0]
 		queue = queue[1:]
 		queue = queue[1:]
 		for _, dep := range head.Requires() {
 		for _, dep := range head.Requires() {
-		  for _, sibling := range Registry.Summon(dep) {
-			  if _, exists := added[sibling.Name()]; !exists {
-				  disabled := false
-				  // If this item supports features, check them against the activated in pipeline.features
-				  if fpi, matches := interface{}(sibling).(FeaturedPipelineItem); matches {
-					  for _, feature := range fpi.Features() {
-						  if !pipeline.features[feature] {
-							  disabled = true
-							  break
-						  }
-					  }
-				  }
-				  if disabled {
-					  continue
-				  }
-				  added[sibling.Name()] = sibling
-				  queue = append(queue, sibling)
-				  pipeline.AddItem(sibling)
-			  }
-		  }
-	  }
+			for _, sibling := range Registry.Summon(dep) {
+				if _, exists := added[sibling.Name()]; !exists {
+					disabled := false
+					// If this item supports features, check them against the activated in pipeline.features
+					if fpi, matches := interface{}(sibling).(FeaturedPipelineItem); matches {
+						for _, feature := range fpi.Features() {
+							if !pipeline.features[feature] {
+								disabled = true
+								break
+							}
+						}
+					}
+					if disabled {
+						continue
+					}
+					added[sibling.Name()] = sibling
+					queue = append(queue, sibling)
+					pipeline.AddItem(sibling)
+				}
+			}
+		}
 	}
 	}
 	return item
 	return item
 }
 }
@@ -177,6 +285,10 @@ func (pipeline *Pipeline) RemoveItem(item PipelineItem) {
 	}
 	}
 }
 }
 
 
+func (pipeline *Pipeline) Len() int {
+	return len(pipeline.items)
+}
+
 // Commits returns the critical path in the repository's history. It starts
 // Commits returns the critical path in the repository's history. It starts
 // from HEAD and traces commits backwards till the root. When it encounters
 // from HEAD and traces commits backwards till the root. When it encounters
 // a merge (more than one parent), it always chooses the first parent.
 // a merge (more than one parent), it always chooses the first parent.
@@ -310,12 +422,13 @@ func (pipeline *Pipeline) resolve(dumpPath string) {
 }
 }
 
 
 func (pipeline *Pipeline) Initialize(facts map[string]interface{}) {
 func (pipeline *Pipeline) Initialize(facts map[string]interface{}) {
-	pipeline.resolve(facts["Pipeline.DumpPath"].(string))
-	if facts["Pipeline.DryRun"].(bool) {
+	dumpPath, _ := facts["Pipeline.DumpPath"].(string)
+	pipeline.resolve(dumpPath)
+	if dryRun, _ := facts["Pipeline.DryRun"].(bool); dryRun {
 		return
 		return
 	}
 	}
 	for _, item := range pipeline.items {
 	for _, item := range pipeline.items {
-		item.Construct(facts)
+		item.Configure(facts)
 	}
 	}
 	for _, item := range pipeline.items {
 	for _, item := range pipeline.items {
 		item.Initialize(pipeline.repository)
 		item.Initialize(pipeline.repository)
@@ -354,7 +467,9 @@ func (pipeline *Pipeline) Run(commits []*object.Commit) (map[PipelineItem]interf
 	onProgress(len(commits), len(commits))
 	onProgress(len(commits), len(commits))
 	result := map[PipelineItem]interface{}{}
 	result := map[PipelineItem]interface{}{}
 	for _, item := range pipeline.items {
 	for _, item := range pipeline.items {
-		result[item] = item.Finalize()
+		if fpi, ok := interface{}(item).(FinalizedPipelineItem); ok {
+			result[item] = fpi.Finalize()
+		}
 	}
 	}
 	return result, nil
 	return result, nil
 }
 }

+ 2 - 2
pipeline_test.go

@@ -36,7 +36,7 @@ func (item *testPipelineItem) Requires() []string {
 	return []string{}
 	return []string{}
 }
 }
 
 
-func (item *testPipelineItem) Construct(facts map[string]interface{}) {
+func (item *testPipelineItem) Configure(facts map[string]interface{}) {
 }
 }
 
 
 func (item *testPipelineItem) Initialize(repository *git.Repository) {
 func (item *testPipelineItem) Initialize(repository *git.Repository) {
@@ -84,7 +84,7 @@ func (item *dependingTestPipelineItem) Requires() []string {
 	return arr[:]
 	return arr[:]
 }
 }
 
 
-func (item *dependingTestPipelineItem) Construct(facts map[string]interface{}) {
+func (item *dependingTestPipelineItem) Configure(facts map[string]interface{}) {
 }
 }
 
 
 func (item *dependingTestPipelineItem) Initialize(repository *git.Repository) {
 func (item *dependingTestPipelineItem) Initialize(repository *git.Repository) {

+ 22 - 8
renames.go

@@ -1,6 +1,8 @@
 package hercules
 package hercules
 
 
 import (
 import (
+	"fmt"
+	"os"
 	"sort"
 	"sort"
 	"unicode/utf8"
 	"unicode/utf8"
 
 
@@ -20,6 +22,10 @@ type RenameAnalysis struct {
 	repository *git.Repository
 	repository *git.Repository
 }
 }
 
 
+const (
+	ConfigRenameAnalysisSimilarityThreshold = "RenameAnalysis.SimilarityThreshold"
+)
+
 func (ra *RenameAnalysis) Name() string {
 func (ra *RenameAnalysis) Name() string {
 	return "RenameAnalysis"
 	return "RenameAnalysis"
 }
 }
@@ -34,15 +40,27 @@ func (ra *RenameAnalysis) Requires() []string {
 	return arr[:]
 	return arr[:]
 }
 }
 
 
-func (ra *RenameAnalysis) Construct(facts map[string]interface{}) {
-	if val, exists := facts["RenameAnalysis.SimilarityThreshold"].(int); exists {
+func (ra *RenameAnalysis) ListConfigurationOptions() []ConfigurationOption {
+	options := [...]ConfigurationOption{{
+		Name:        ConfigBurndownGranularity,
+		Description: "How many days there are in a single band.",
+		Flag:        "M",
+		Type:        IntConfigurationOption,
+		Default:     90},
+	}
+	return options[:]
+}
+
+func (ra *RenameAnalysis) Configure(facts map[string]interface{}) {
+	if val, exists := facts[ConfigRenameAnalysisSimilarityThreshold].(int); exists {
 		ra.SimilarityThreshold = val
 		ra.SimilarityThreshold = val
 	}
 	}
 }
 }
 
 
 func (ra *RenameAnalysis) Initialize(repository *git.Repository) {
 func (ra *RenameAnalysis) Initialize(repository *git.Repository) {
 	if ra.SimilarityThreshold < 0 || ra.SimilarityThreshold > 100 {
 	if ra.SimilarityThreshold < 0 || ra.SimilarityThreshold > 100 {
-		panic("hercules.RenameAnalysis: an invalid SimilarityThreshold was specified")
+		fmt.Fprintln(os.Stderr, "Warning: adjusted the similarity threshold to 90")
+		ra.SimilarityThreshold = 90
 	}
 	}
 	ra.repository = repository
 	ra.repository = repository
 }
 }
@@ -158,10 +176,6 @@ func (ra *RenameAnalysis) Consume(deps map[string]interface{}) (map[string]inter
 	return map[string]interface{}{"changes": reduced_changes}, nil
 	return map[string]interface{}{"changes": reduced_changes}, nil
 }
 }
 
 
-func (ra *RenameAnalysis) Finalize() interface{} {
-	return nil
-}
-
 func (ra *RenameAnalysis) sizesAreClose(size1 int64, size2 int64) bool {
 func (ra *RenameAnalysis) sizesAreClose(size1 int64, size2 int64) bool {
 	return abs64(size1-size2)*100/max64(1, min64(size1, size2)) <=
 	return abs64(size1-size2)*100/max64(1, min64(size1, size2)) <=
 		int64(100-ra.SimilarityThreshold)
 		int64(100-ra.SimilarityThreshold)
@@ -241,5 +255,5 @@ func (slice sortableBlobs) Swap(i, j int) {
 }
 }
 
 
 func init() {
 func init() {
-  Registry.Register(&RenameAnalysis{})
+	Registry.Register(&RenameAnalysis{})
 }
 }

+ 0 - 6
renames_test.go

@@ -34,12 +34,6 @@ func TestRenameAnalysisInitializeInvalidThreshold(t *testing.T) {
 	ra.Initialize(testRepository)
 	ra.Initialize(testRepository)
 }
 }
 
 
-func TestRenameAnalysisFinalize(t *testing.T) {
-	ra := fixtureRenameAnalysis()
-	r := ra.Finalize()
-	assert.Nil(t, r)
-}
-
 func TestRenameAnalysisConsume(t *testing.T) {
 func TestRenameAnalysisConsume(t *testing.T) {
 	ra := fixtureRenameAnalysis()
 	ra := fixtureRenameAnalysis()
 	changes := make(object.Changes, 3)
 	changes := make(object.Changes, 3)

+ 6 - 6
tree_diff.go

@@ -24,7 +24,11 @@ func (treediff *TreeDiff) Requires() []string {
 	return []string{}
 	return []string{}
 }
 }
 
 
-func (treediff *TreeDiff) Construct(facts map[string]interface{}) {}
+func (treediff *TreeDiff) ListConfigurationOptions() []ConfigurationOption {
+	return []ConfigurationOption{}
+}
+
+func (treediff *TreeDiff) Configure(facts map[string]interface{}) {}
 
 
 func (treediff *TreeDiff) Initialize(repository *git.Repository) {
 func (treediff *TreeDiff) Initialize(repository *git.Repository) {
 	treediff.previousTree = nil
 	treediff.previousTree = nil
@@ -69,10 +73,6 @@ func (treediff *TreeDiff) Consume(deps map[string]interface{}) (map[string]inter
 	return map[string]interface{}{"changes": diff}, nil
 	return map[string]interface{}{"changes": diff}, nil
 }
 }
 
 
-func (treediff *TreeDiff) Finalize() interface{} {
-	return nil
-}
-
 func init() {
 func init() {
-  Registry.Register(&TreeDiff{})
+	Registry.Register(&TreeDiff{})
 }
 }

+ 0 - 6
tree_diff_test.go

@@ -81,9 +81,3 @@ func TestTreeDiffConsumeFirst(t *testing.T) {
 		assert.Equal(t, action, merkletrie.Insert)
 		assert.Equal(t, action, merkletrie.Insert)
 	}
 	}
 }
 }
-
-func TestTreeDiffFinalize(t *testing.T) {
-	id := fixtureTreeDiff()
-	res := id.Finalize()
-	assert.Nil(t, res)
-}

+ 104 - 41
uast.go

@@ -1,6 +1,7 @@
 package hercules
 package hercules
 
 
 import (
 import (
+	"bytes"
 	"context"
 	"context"
 	"errors"
 	"errors"
 	"fmt"
 	"fmt"
@@ -8,28 +9,42 @@ import (
 	"runtime"
 	"runtime"
 	"strings"
 	"strings"
 	"sync"
 	"sync"
+	"time"
 
 
 	"github.com/jeffail/tunny"
 	"github.com/jeffail/tunny"
 	"gopkg.in/bblfsh/client-go.v1"
 	"gopkg.in/bblfsh/client-go.v1"
 	"gopkg.in/bblfsh/sdk.v1/protocol"
 	"gopkg.in/bblfsh/sdk.v1/protocol"
 	"gopkg.in/bblfsh/sdk.v1/uast"
 	"gopkg.in/bblfsh/sdk.v1/uast"
+	"gopkg.in/src-d/enry.v1"
 	"gopkg.in/src-d/go-git.v4"
 	"gopkg.in/src-d/go-git.v4"
 	"gopkg.in/src-d/go-git.v4/plumbing"
 	"gopkg.in/src-d/go-git.v4/plumbing"
 	"gopkg.in/src-d/go-git.v4/plumbing/object"
 	"gopkg.in/src-d/go-git.v4/plumbing/object"
+	"gopkg.in/src-d/go-git.v4/utils/ioutil"
 	"gopkg.in/src-d/go-git.v4/utils/merkletrie"
 	"gopkg.in/src-d/go-git.v4/utils/merkletrie"
 )
 )
 
 
 type UASTExtractor struct {
 type UASTExtractor struct {
-	Endpoint string
-	Context  func() context.Context
-	PoolSize int
-	Extensions map[string]bool
-	FailOnErrors bool
+	Endpoint       string
+	Context        func() context.Context
+	PoolSize       int
+	Languages      map[string]bool
+	FailOnErrors   bool
+	ProcessedFiles map[string]int
 
 
 	clients []*bblfsh.BblfshClient
 	clients []*bblfsh.BblfshClient
-	pool   *tunny.WorkPool
+	pool    *tunny.WorkPool
 }
 }
 
 
+const (
+	UAST_EXTRACTION_SKIPPED = -(1 << 31)
+
+	ConfigUASTEndpoint     = "UAST.Endpoint"
+	ConfigUASTTimeout      = "UAST.Timeout"
+	ConfigUASTPoolSize     = "UAST.PoolSize"
+	ConfigUASTFailOnErrors = "UAST.FailOnErrors"
+	ConfigUASTLanguages    = "UAST.Languages"
+)
+
 type uastTask struct {
 type uastTask struct {
 	Client *bblfsh.BblfshClient
 	Client *bblfsh.BblfshClient
 	Lock   *sync.RWMutex
 	Lock   *sync.RWMutex
@@ -41,7 +56,7 @@ type uastTask struct {
 
 
 type worker struct {
 type worker struct {
 	Client *bblfsh.BblfshClient
 	Client *bblfsh.BblfshClient
-	Job func(interface{}) interface{}
+	Job    func(interface{}) interface{}
 }
 }
 
 
 func (w worker) TunnyReady() bool {
 func (w worker) TunnyReady() bool {
@@ -73,20 +88,58 @@ func (exr *UASTExtractor) Features() []string {
 	return arr[:]
 	return arr[:]
 }
 }
 
 
-func (exr *UASTExtractor) Construct(facts map[string]interface{}) {
-	if val, exists := facts["UAST.Endpoint"].(string); exists {
+func (exr *UASTExtractor) ListConfigurationOptions() []ConfigurationOption {
+	options := [...]ConfigurationOption{{
+		Name:        ConfigUASTEndpoint,
+		Description: "How many days there are in a single band.",
+		Flag:        "bblfsh",
+		Type:        StringConfigurationOption,
+		Default:     "0.0.0.0:9432"}, {
+		Name:        ConfigUASTTimeout,
+		Description: "Babelfish's server timeout in seconds.",
+		Flag:        "bblfsh-timeout",
+		Type:        IntConfigurationOption,
+		Default:     20}, {
+		Name:        ConfigUASTPoolSize,
+		Description: "Number of goroutines to extract UASTs.",
+		Flag:        "bblfsh-pool-size",
+		Type:        IntConfigurationOption,
+		Default:     runtime.NumCPU()}, {
+		Name:        ConfigUASTFailOnErrors,
+		Description: "Panic if there is a UAST extraction error.",
+		Flag:        "bblfsh-fail-on-error",
+		Type:        BoolConfigurationOption,
+		Default:     false}, {
+		Name:        ConfigUASTLanguages,
+		Description: "Programming languages from which to extract UASTs. Separated by comma \",\".",
+		Flag:        "languages",
+		Type:        StringConfigurationOption,
+		Default:     "Python,Java"},
+	}
+	return options[:]
+}
+
+func (exr *UASTExtractor) Configure(facts map[string]interface{}) {
+	if val, exists := facts[ConfigUASTEndpoint].(string); exists {
 		exr.Endpoint = val
 		exr.Endpoint = val
 	}
 	}
-	if val, exists := facts["UAST.Context"].(func() context.Context); exists {
-		exr.Context = val
+	if val, exists := facts["UAST.Timeout"].(int); exists {
+		exr.Context = func() context.Context {
+			ctx, _ := context.WithTimeout(context.Background(),
+				time.Duration(val)*time.Second)
+			return ctx
+		}
 	}
 	}
-	if val, exists := facts["UAST.PoolSize"].(int); exists {
+	if val, exists := facts[ConfigUASTPoolSize].(int); exists {
 		exr.PoolSize = val
 		exr.PoolSize = val
 	}
 	}
-	if val, exists := facts["UAST.Extensions"].(map[string]bool); exists {
-		exr.Extensions = val
+	if val, exists := facts[ConfigUASTLanguages].(string); exists {
+		exr.Languages = map[string]bool{}
+		for _, lang := range strings.Split(val, ",") {
+			exr.Languages[lang] = true
+		}
 	}
 	}
-	if val, exists := facts["UAST.FailOnErrors"].(bool); exists {
+	if val, exists := facts[ConfigUASTFailOnErrors].(bool); exists {
 		exr.FailOnErrors = val
 		exr.FailOnErrors = val
 	}
 	}
 }
 }
@@ -119,6 +172,7 @@ func (exr *UASTExtractor) Initialize(repository *git.Repository) {
 	if err != nil {
 	if err != nil {
 		panic(err)
 		panic(err)
 	}
 	}
+	exr.ProcessedFiles = map[string]int{}
 }
 }
 
 
 func (exr *UASTExtractor) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
 func (exr *UASTExtractor) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
@@ -130,16 +184,25 @@ func (exr *UASTExtractor) Consume(deps map[string]interface{}) (map[string]inter
 	status := make(chan int)
 	status := make(chan int)
 	pending := 0
 	pending := 0
 	submit := func(change *object.Change) {
 	submit := func(change *object.Change) {
-		var ext string
-		dotpos := strings.LastIndex(change.To.Name, ".")
-		if dotpos >= 0 {
-			ext = change.To.Name[dotpos + 1:]
-		} else {
-			ext = change.To.Name
-		}
-		_, exists := exr.Extensions[ext]
-		if !exists {
-			return
+		{
+			reader, err := cache[change.To.TreeEntry.Hash].Reader()
+			if err != nil {
+				errs = append(errs, err)
+				return
+			}
+			defer ioutil.CheckClose(reader, &err)
+
+			buf := new(bytes.Buffer)
+			if _, err := buf.ReadFrom(reader); err != nil {
+				errs = append(errs, err)
+				return
+			}
+			lang, _ := enry.GetLanguageByContent(change.To.Name, buf.Bytes())
+			if _, exists := exr.Languages[lang]; !exists {
+				exr.ProcessedFiles[change.To.Name] = UAST_EXTRACTION_SKIPPED
+				return
+			}
+			exr.ProcessedFiles[change.To.Name]++
 		}
 		}
 		pending++
 		pending++
 		exr.pool.SendWorkAsync(uastTask{
 		exr.pool.SendWorkAsync(uastTask{
@@ -180,12 +243,8 @@ func (exr *UASTExtractor) Consume(deps map[string]interface{}) (map[string]inter
 	return map[string]interface{}{"uasts": uasts}, nil
 	return map[string]interface{}{"uasts": uasts}, nil
 }
 }
 
 
-func (exr *UASTExtractor) Finalize() interface{} {
-	return nil
-}
-
 func (exr *UASTExtractor) extractUAST(
 func (exr *UASTExtractor) extractUAST(
-		client *bblfsh.BblfshClient, file *object.File) (*uast.Node, error) {
+	client *bblfsh.BblfshClient, file *object.File) (*uast.Node, error) {
 	request := client.NewParseRequest()
 	request := client.NewParseRequest()
 	contents, err := file.Contents()
 	contents, err := file.Contents()
 	if err != nil {
 	if err != nil {
@@ -225,12 +284,12 @@ func (exr *UASTExtractor) extractTask(data interface{}) interface{} {
 
 
 type UASTChange struct {
 type UASTChange struct {
 	Before *uast.Node
 	Before *uast.Node
-	After *uast.Node
+	After  *uast.Node
 	Change *object.Change
 	Change *object.Change
 }
 }
 
 
 type UASTChanges struct {
 type UASTChanges struct {
-  cache map[plumbing.Hash]*uast.Node
+	cache map[plumbing.Hash]*uast.Node
 }
 }
 
 
 func (uc *UASTChanges) Name() string {
 func (uc *UASTChanges) Name() string {
@@ -252,14 +311,18 @@ func (uc *UASTChanges) Features() []string {
 	return arr[:]
 	return arr[:]
 }
 }
 
 
-func (uc *UASTChanges) Construct(facts map[string]interface{}) {}
+func (uc *UASTChanges) ListConfigurationOptions() []ConfigurationOption {
+	return []ConfigurationOption{}
+}
+
+func (uc *UASTChanges) Configure(facts map[string]interface{}) {}
 
 
 func (uc *UASTChanges) Initialize(repository *git.Repository) {
 func (uc *UASTChanges) Initialize(repository *git.Repository) {
 	uc.cache = map[plumbing.Hash]*uast.Node{}
 	uc.cache = map[plumbing.Hash]*uast.Node{}
 }
 }
 
 
 func (uc *UASTChanges) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
 func (uc *UASTChanges) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
-  uasts := deps["uasts"].(map[plumbing.Hash]*uast.Node)
+	uasts := deps["uasts"].(map[plumbing.Hash]*uast.Node)
 	treeDiffs := deps["changes"].(object.Changes)
 	treeDiffs := deps["changes"].(object.Changes)
 	commit := make([]UASTChange, 0, len(treeDiffs))
 	commit := make([]UASTChange, 0, len(treeDiffs))
 	for _, change := range treeDiffs {
 	for _, change := range treeDiffs {
@@ -289,12 +352,8 @@ func (uc *UASTChanges) Consume(deps map[string]interface{}) (map[string]interfac
 	return map[string]interface{}{"changed_uasts": commit}, nil
 	return map[string]interface{}{"changed_uasts": commit}, nil
 }
 }
 
 
-func (uc *UASTChanges) Finalize() interface{} {
-	return nil
-}
-
 type UASTChangesSaver struct {
 type UASTChangesSaver struct {
-  result [][]UASTChange
+	result [][]UASTChange
 }
 }
 
 
 func (saver *UASTChangesSaver) Name() string {
 func (saver *UASTChangesSaver) Name() string {
@@ -315,7 +374,11 @@ func (saver *UASTChangesSaver) Features() []string {
 	return arr[:]
 	return arr[:]
 }
 }
 
 
-func (saver *UASTChangesSaver) Construct(facts map[string]interface{}) {}
+func (saver *UASTChangesSaver) ListConfigurationOptions() []ConfigurationOption {
+	return []ConfigurationOption{}
+}
+
+func (saver *UASTChangesSaver) Configure(facts map[string]interface{}) {}
 
 
 func (saver *UASTChangesSaver) Initialize(repository *git.Repository) {
 func (saver *UASTChangesSaver) Initialize(repository *git.Repository) {
 	saver.result = [][]UASTChange{}
 	saver.result = [][]UASTChange{}
@@ -332,7 +395,7 @@ func (saver *UASTChangesSaver) Finalize() interface{} {
 }
 }
 
 
 func init() {
 func init() {
-  Registry.Register(&UASTExtractor{})
+	Registry.Register(&UASTExtractor{})
 	Registry.Register(&UASTChanges{})
 	Registry.Register(&UASTChanges{})
 	Registry.Register(&UASTChangesSaver{})
 	Registry.Register(&UASTChangesSaver{})
 }
 }

+ 3 - 8
uast_test.go

@@ -4,14 +4,15 @@ import (
 	"testing"
 	"testing"
 
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/assert"
-	"gopkg.in/src-d/go-git.v4/plumbing/object"
-	"gopkg.in/src-d/go-git.v4/plumbing"
 	"gopkg.in/bblfsh/sdk.v1/uast"
 	"gopkg.in/bblfsh/sdk.v1/uast"
+	"gopkg.in/src-d/go-git.v4/plumbing"
+	"gopkg.in/src-d/go-git.v4/plumbing/object"
 )
 )
 
 
 func fixtureUASTExtractor() *UASTExtractor {
 func fixtureUASTExtractor() *UASTExtractor {
 	exr := UASTExtractor{Endpoint: "0.0.0.0:9432"}
 	exr := UASTExtractor{Endpoint: "0.0.0.0:9432"}
 	exr.Initialize(testRepository)
 	exr.Initialize(testRepository)
+	exr.Languages["Python"] = true
 	return &exr
 	return &exr
 }
 }
 
 
@@ -25,12 +26,6 @@ func TestUASTExtractorMeta(t *testing.T) {
 	assert.Equal(t, exr.Requires()[1], "blob_cache")
 	assert.Equal(t, exr.Requires()[1], "blob_cache")
 }
 }
 
 
-func TestUASTExtractorFinalize(t *testing.T) {
-	exr := fixtureUASTExtractor()
-	r := exr.Finalize()
-	assert.Nil(t, r)
-}
-
 func TestUASTExtractorConsume(t *testing.T) {
 func TestUASTExtractorConsume(t *testing.T) {
 	exr := fixtureUASTExtractor()
 	exr := fixtureUASTExtractor()
 	changes := make(object.Changes, 2)
 	changes := make(object.Changes, 2)