Forráskód Böngészése

Merge pull request #21 from src-d/uast

v2 -> v3
Vadim Markovtsev 7 éve
szülő
commit
46dff379ed
40 módosított fájl, 3848 hozzáadás és 755 törlés
  1. 3 0
      .gitignore
  2. 26 7
      .travis.yml
  3. 22 0
      Makefile
  4. 54 39
      README.md
  5. 26 5
      blob_cache.go
  6. 26 10
      blob_cache_test.go
  7. 165 3
      burndown.go
  8. 278 4
      burndown_test.go
  9. 153 204
      cmd/hercules/main.go
  10. 168 9
      couples.go
  11. 109 7
      couples_test.go
  12. 8 2
      day.go
  13. 10 4
      day_test.go
  14. 12 7
      diff.go
  15. 57 0
      diff_refiner.go
  16. 17 7
      diff_test.go
  17. 9 19
      doc.go
  18. 27 0
      doc/dag.dot
  19. BIN
      doc/dag.png
  20. 1 1
      file.go
  21. 1 1
      file_test.go
  22. 55 10
      identity.go
  23. 104 19
      identity_test.go
  24. 95 35
      labours.py
  25. 211 95
      pb/pb.pb.go
  26. 38 20
      pb/pb.proto
  27. 292 58
      pb/pb_pb2.py
  28. 13 3
      pb/utils.go
  29. 416 28
      pipeline.go
  30. 203 13
      pipeline_test.go
  31. 34 7
      renames.go
  32. 32 11
      renames_test.go
  33. 7 101
      stdout/utils.go
  34. 200 13
      toposort/toposort.go
  35. 150 5
      toposort/toposort_test.go
  36. 8 2
      tree_diff.go
  37. 17 6
      tree_diff_test.go
  38. 497 0
      uast.go
  39. 301 0
      uast_test.go
  40. 3 0
      version.go

+ 3 - 0
.gitignore

@@ -1,3 +1,6 @@
+**/.DS_Store
+.idea
+
 # Compiled Object files, Static and Dynamic libs (Shared Objects)
 *.o
 *.a

+ 26 - 7
.travis.yml

@@ -2,22 +2,41 @@ dist: trusty
 
 language: go
 
+services:
+  - docker
+
+addons:
+  apt:
+    packages:
+    - libxml2-dev
+    - libcunit1-dev
+
 go:
   - 1.8
   - 1.9
 
-go_import_path: gopkg.in/src-d/hercules.v2
+go_import_path: gopkg.in/src-d/hercules.v3
 
 before_install:
+  - wget http://mirrors.kernel.org/ubuntu/pool/main/m/make-dfsg/make_4.1-9.1_amd64.deb
+  - dpkg -x make_4.1-9.1_amd64.deb ~ && rm make_4.1-9.1_amd64.deb
   - wget https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py --user && rm get-pip.py
-  - export PATH=$PATH:~/.local/bin
-  - pip3 install --user -r requirements.txt
-  - pip3 install --user tensorflow
+  - export PATH=~/.local/bin:~/usr/bin:$PATH
+  - make --version
+  - pip3 --version
+
+install:
+  - make dependencies
+  - go get -t -v -ldflags "-X gopkg.in/src-d/hercules.v3.GIT_HASH=$(git rev-parse HEAD)" ./...
+  - pip3 install --user -r requirements.txt tensorflow
+  - docker run -d --privileged -p 9432:9432 --name bblfshd bblfsh/bblfshd
+  - docker exec -it bblfshd bblfshctl driver install --all
   
 script:
-  - go test -v -cpu=1,2 -coverprofile=coverage.txt -covermode=count gopkg.in/src-d/hercules.v2
-  - $GOPATH/bin/hercules -files -people -couples https://github.com/src-d/hercules | python3 labours.py -m all -o out --backend Agg --disable-projector
-  - $GOPATH/bin/hercules -files -people -couples -pb https://github.com/src-d/hercules | python3 labours.py -f pb -m all -o out --backend Agg --disable-projector
+  - go test -v -cpu=1,2 -coverprofile=coverage.txt -covermode=count gopkg.in/src-d/hercules.v3
+  - $GOPATH/bin/hercules -version
+  - $GOPATH/bin/hercules -burndown -burndown-files -burndown-people -couples -quiet https://github.com/src-d/hercules | python3 labours.py -m all -o out --backend Agg --disable-projector
+  - $GOPATH/bin/hercules -burndown -burndown-files -burndown-people -couples -quiet -pb https://github.com/src-d/hercules | python3 labours.py -f pb -m all -o out --backend Agg --disable-projector
 
 after_success:
   - bash <(curl -s https://codecov.io/bash)

+ 22 - 0
Makefile

@@ -0,0 +1,22 @@
+ifneq (oneshell, $(findstring oneshell, $(.FEATURES)))
+  $(error GNU make 3.82 or later is required)
+endif
+
+all: dependencies ${GOPATH}/bin/hercules
+
+test: all
+	go test gopkg.in/src-d/hercules.v3
+
+dependencies: ${GOPATH}/src/gopkg.in/bblfsh/client-go.v2
+
+.ONESHELL:
+${GOPATH}/src/gopkg.in/bblfsh/client-go.v2:
+	go get -v gopkg.in/bblfsh/client-go.v2/... || true
+	cd $$GOPATH/src/gopkg.in/bblfsh/client-go.v2
+	make dependencies
+
+.ONESHELL:
+${GOPATH}/bin/hercules: *.go cmd/hercules/*.go rbtree/*.go stdout/*.go toposort/*.go pb/*.go
+	cd ${GOPATH}/src/gopkg.in/src-d/hercules.v3
+	go get -ldflags "-X gopkg.in/src-d/hercules.v3.GIT_HASH=$$(git rev-parse HEAD)" gopkg.in/src-d/hercules.v3/cmd/hercules
+	${GOPATH}/bin/hercules -version

+ 54 - 39
README.md

@@ -1,29 +1,19 @@
 Hercules [![Build Status](https://travis-ci.org/src-d/hercules.svg?branch=master)](https://travis-ci.org/src-d/hercules) [![codecov](https://codecov.io/github/src-d/hercules/coverage.svg)](https://codecov.io/gh/src-d/hercules)
 --------
 
-This project calculates and plots the lines burndown and other fun stats in Git repositories.
-Exactly the same what [git-of-theseus](https://github.com/erikbern/git-of-theseus)
-does actually, but using [go-git](https://github.com/src-d/go-git).
-Why? [source{d}](http://sourced.tech) builds it's own data pipeline to
-process every git repository in the world and the calculation of the
-annual burnout ratio will be embedded into it. `hercules` contains an
-open source implementation of the specific `git blame` flavour on top
-of go-git. Blaming is performed incrementally using the custom RB tree tracking
-algorithm, only the last modification date is recorded.
+Amazingly fast and highly customizable Git repository analysis engine written in Go. Batteries included.
+Powered by [go-git](https://github.com/src-d/go-git) and [Babelfish](https://doc.bblf.sh).
 
 There are two tools: `hercules` and `labours.py`. The first is the program
-written in Go which collects the burndown and other stats from a Git repository.
-The second is the Python script which draws the stack area plots and optionally
-resamples the time series. These two tools are normally used together through
-the pipe. `hercules` prints results in plain text. The first line is four numbers:
-UNIX timestamp which corresponds to the time the repository was created,
-UNIX timestamp of the last commit, *granularity* and *sampling*.
-Granularity is the number of days each band in the stack consists of. Sampling
-is the frequency with which the burnout state is snapshotted. The smaller the
-value, the more smooth is the plot but the more work is done.
+written in Go which takes a Git repository and runs a Directed Acyclic Graph (DAG) of analysis tasks.
+The second is the Python script which draws some predefined plots. These two tools are normally used together through
+a pipe. It is possible to write custom analyses using the plugin system.
+
+![git/git image](doc/dag.png)
+<p align="center">The DAG of burndown and couples analyses with UAST diff refining. Generated with <code>hercules -burndown -burndown-people -couples -feature=uast -dry-run -dump-dag doc/dag.dot https://github.com/src-d/hercules</code></p>
 
 ![git/git image](doc/linux.png)
-<p align="center">torvalds/linux burndown (granularity 30, sampling 30, resampled by year)</p>
+<p align="center">torvalds/linux line burndown (granularity 30, sampling 30, resampled by year)</p>
 
 There is an option to resample the bands inside `labours.py`, so that you can
 define a very precise distribution and visualize it different ways. Besides,
@@ -35,29 +25,33 @@ There is a [presentation](http://vmarkovtsev.github.io/techtalks-2017-moscow-lig
 ### Installation
 You are going to need Go (>= v1.8) and Python 2 or 3.
 ```
-go get gopkg.in/src-d/hercules.v2/cmd/hercules
-pip install -r requirements.txt
-wget https://github.com/src-d/hercules/raw/master/labours.py
+go get gopkg.in/src-d/hercules.v3/cmd/hercules
+cd $GOPATH/src/gopkg.in/hercules.v3/cmd/hercules
+make
 ```
 
+The first command fails with `libuast.h` not found - this is expected. Pretend that nothing has
+happened and carry on.
+
 #### Windows
 Numpy and SciPy are requirements. Install the correct version by downloading the wheel from http://www.lfd.uci.edu/~gohlke/pythonlibs/#scipy.
+Couples analysis also needs Tensorflow.
 
 ### Usage
 ```
-# Use "memory" go-git backend and display the plot. This is the fastest but the repository data must fit into RAM.
-hercules https://github.com/src-d/go-git | python3 labours.py --resample month
-# Use "file system" go-git backend and print the raw data.
+# Use "memory" go-git backend and display the burndown plot. "memory" is the fastest but the repository's git data must fit into RAM.
+hercules -burndown https://github.com/src-d/go-git | python3 labours.py -m project --resample month
+# Use "file system" go-git backend and print some basic information about the repository.
 hercules /path/to/cloned/go-git
-# Use "file system" go-git backend, cache the cloned repository to /tmp/repo-cache, use Protocol Buffers and display the unresampled plot.
-hercules -pb https://github.com/git/git /tmp/repo-cache | python3 labours.py -f pb --resample raw
+# Use "file system" go-git backend, cache the cloned repository to /tmp/repo-cache, use Protocol Buffers and display the burndown plot without resampling.
+hercules -burndown -pb https://github.com/git/git /tmp/repo-cache | python3 labours.py -m project -f pb --resample raw
 
 # Now something fun
 # Get the linear history from git rev-list, reverse it
-# Pipe to hercules, produce the snapshots for every 30 days grouped by 30 days
+# Pipe to hercules, produce burndown snapshots for every 30 days grouped by 30 days
 # Save the raw data to cache.yaml, so that later is possible to python3 labours.py -i cache.yaml
 # Pipe the raw data to labours.py, set text font size to 16pt, use Agg matplotlib backend and save the plot to output.png
-git rev-list HEAD | tac | hercules -commits - https://github.com/git/git | tee cache.yaml | python3 labours.py --font-size 16 --backend Agg --output git.png
+git rev-list HEAD | tac | hercules -commits - -burndown https://github.com/git/git | tee cache.yaml | python3 labours.py -m project --font-size 16 --backend Agg --output git.png
 ```
 
 `labours.py -i /path/to/yaml` allows to read the output from `hercules` which was saved on disk.
@@ -72,21 +66,38 @@ corresponding directory instead of cloning from scratch:
 hercules https://github.com/git/git /tmp/repo-cache
 
 # Second time - use the cache
-hercules /tmp/repo-cache
+hercules -some-analysis /tmp/repo-cache
 ```
 
 #### Docker image
 
 ```
-docker run --rm srcd/hercules hercules -pb https://github.com/git/git | docker run --rm -i -v $(pwd):/io srcd/hercules labours.py -f pb -o /io/git_git.png
+docker run --rm srcd/hercules hercules -burndown -pb https://github.com/git/git | docker run --rm -i -v $(pwd):/io srcd/hercules labours.py -f pb -m project -o /io/git_git.png
+```
+
+### Built-in analyses
+
+#### Project burndown
+
+```
+hercules -burndown
+python3 labours.py -m project
 ```
 
-### Extensions
+Line burndown statistics for the whole repository.
+Exactly the same what [git-of-theseus](https://github.com/erikbern/git-of-theseus)
+does but much faster. Blaming is performed efficiently and incrementally using a custom RB tree tracking
+algorithm, and only the last modification date is recorded while running the analysis.
+
+All burndown analyses depend on the values of *granularity* and *sampling*.
+Granularity is the number of days each band in the stack consists of. Sampling
+is the frequency with which the burnout state is snapshotted. The smaller the
+value, the more smooth is the plot but the more work is done.
 
 #### Files
 
 ```
-hercules -files
+hercules -burndown -burndown-files
 python3 labours.py -m files
 ```
 
@@ -95,11 +106,11 @@ Burndown statistics for every file in the repository which is alive in the lates
 #### People
 
 ```
-hercules -people [-people-dict=/path/to/identities]
+hercules -burndown -burndown-people [-people-dict=/path/to/identities]
 python3 labours.py -m person
 ```
 
-Burndown statistics for developers. If `-people-dict` is not specified, the identities are
+Burndown statistics for the repository's contributors. If `-people-dict` is not specified, the identities are
 discovered by the following algorithm:
 
 0. We start from the root commit towards the HEAD. Emails and names are converted to lower case.
@@ -119,7 +130,7 @@ by `|`. The case is ignored.
 <p align="center">Wireshark top 20 devs - churn matrix</p>
 
 ```
-hercules -people [-people-dict=/path/to/identities]
+hercules -burndown -burndown-people [-people-dict=/path/to/identities]
 python3 labours.py -m churn_matrix
 ```
 
@@ -141,7 +152,7 @@ The sequence of developers is stored in `people_sequence` YAML node.
 <p align="center">Ember.js top 20 devs - code ownership</p>
 
 ```
-hercules -people [-people-dict=/path/to/identities]
+hercules -burndown -burndown-people [-people-dict=/path/to/identities]
 python3 labours.py -m ownership
 ```
 
@@ -174,10 +185,14 @@ can be visualized with t-SNE implemented in TF Projector.
 #### Everything in a single pass
 
 ```
-hercules -files -people -couples [-people-dict=/path/to/identities]
+hercules -burndown -burndown-files -burndown-people -couples [-people-dict=/path/to/identities]
 python3 labours.py -m all
 ```
 
+### Plugins
+
+Hercules has a plugin system and allows to run custom analyses. See [PLUGINS.md](PLUGINS.md).
+
 ### Bad unicode errors
 
 YAML does not support the whole range of Unicode characters and the parser on `labours.py` side
@@ -185,7 +200,7 @@ may raise exceptions. Filter the output from `hercules` through `fix_yaml_unicod
 such offending characters.
 
 ```
-hercules -people https://github.com/... | python3 fix_yaml_unicode.py | python3 labours.py -m people
+hercules -burndown -burndown-people https://github.com/... | python3 fix_yaml_unicode.py | python3 labours.py -m people
 ```
 
 ### Plotting

+ 26 - 5
blob_cache.go

@@ -15,9 +15,13 @@ type BlobCache struct {
 	IgnoreMissingSubmodules bool
 
 	repository *git.Repository
-	cache map[plumbing.Hash]*object.Blob
+	cache      map[plumbing.Hash]*object.Blob
 }
 
+const (
+	ConfigBlobCacheIgnoreMissingSubmodules = "BlobCache.IgnoreMissingSubmodules"
+)
+
 func (cache *BlobCache) Name() string {
 	return "BlobCache"
 }
@@ -32,6 +36,23 @@ func (cache *BlobCache) Requires() []string {
 	return arr[:]
 }
 
+func (cache *BlobCache) ListConfigurationOptions() []ConfigurationOption {
+	options := [...]ConfigurationOption{{
+		Name: ConfigBlobCacheIgnoreMissingSubmodules,
+		Description: "Specifies whether to panic if some submodules do not exist and thus " +
+			"the corresponding Git objects cannot be loaded.",
+		Flag:    "ignore-missing-submodules",
+		Type:    BoolConfigurationOption,
+		Default: false}}
+	return options[:]
+}
+
+func (cache *BlobCache) Configure(facts map[string]interface{}) {
+	if val, exists := facts[ConfigBlobCacheIgnoreMissingSubmodules].(bool); exists {
+		cache.IgnoreMissingSubmodules = val
+	}
+}
+
 func (cache *BlobCache) Initialize(repository *git.Repository) {
 	cache.repository = repository
 	cache.cache = map[plumbing.Hash]*object.Blob{}
@@ -97,10 +118,6 @@ func (self *BlobCache) Consume(deps map[string]interface{}) (map[string]interfac
 	return map[string]interface{}{"blob_cache": cache}, nil
 }
 
-func (cache *BlobCache) Finalize() interface{} {
-	return nil
-}
-
 type FileGetter func(path string) (*object.File, error)
 
 func (cache *BlobCache) getBlob(entry *object.ChangeEntry, fileGetter FileGetter) (
@@ -139,3 +156,7 @@ func (cache *BlobCache) getBlob(entry *object.ChangeEntry, fileGetter FileGetter
 	}
 	return blob, nil
 }
+
+func init() {
+	Registry.Register(&BlobCache{})
+}

+ 26 - 10
blob_cache_test.go

@@ -17,9 +17,17 @@ func fixtureBlobCache() *BlobCache {
 	return cache
 }
 
-func TestBlobCacheInitialize(t *testing.T) {
+func TestBlobCacheConfigureInitialize(t *testing.T) {
 	cache := fixtureBlobCache()
 	assert.Equal(t, testRepository, cache.repository)
+	assert.False(t, cache.IgnoreMissingSubmodules)
+	facts := map[string]interface{}{}
+	facts[ConfigBlobCacheIgnoreMissingSubmodules] = true
+	cache.Configure(facts)
+	assert.True(t, cache.IgnoreMissingSubmodules)
+	facts = map[string]interface{}{}
+	cache.Configure(facts)
+	assert.True(t, cache.IgnoreMissingSubmodules)
 }
 
 func TestBlobCacheMetadata(t *testing.T) {
@@ -30,6 +38,19 @@ func TestBlobCacheMetadata(t *testing.T) {
 	assert.Equal(t, len(cache.Requires()), 1)
 	changes := &TreeDiff{}
 	assert.Equal(t, cache.Requires()[0], changes.Provides()[0])
+	opts := cache.ListConfigurationOptions()
+	assert.Len(t, opts, 1)
+	assert.Equal(t, opts[0].Name, ConfigBlobCacheIgnoreMissingSubmodules)
+}
+
+func TestBlobCacheRegistration(t *testing.T) {
+	tp, exists := Registry.registered[(&BlobCache{}).Name()]
+	assert.True(t, exists)
+	assert.Equal(t, tp.Elem().Name(), "BlobCache")
+	tps, exists := Registry.provided[(&BlobCache{}).Provides()[0]]
+	assert.True(t, exists)
+	assert.Len(t, tps, 1)
+	assert.Equal(t, tps[0].Elem().Name(), "BlobCache")
 }
 
 func TestBlobCacheConsumeModification(t *testing.T) {
@@ -184,10 +205,10 @@ func TestBlobCacheConsumeBadHashes(t *testing.T) {
 	assert.Nil(t, err)
 	changes[0] = &object.Change{From: object.ChangeEntry{},
 		To: object.ChangeEntry{
-		Name:      "labours.py",
-		Tree:      treeTo,
-		TreeEntry: object.TreeEntry{},
-	}}
+			Name:      "labours.py",
+			Tree:      treeTo,
+			TreeEntry: object.TreeEntry{},
+		}}
 	result, err = fixtureBlobCache().Consume(deps)
 	assert.Nil(t, result)
 	assert.NotNil(t, err)
@@ -222,11 +243,6 @@ func TestBlobCacheConsumeInvalidHash(t *testing.T) {
 	assert.NotNil(t, err)
 }
 
-func TestBlobCacheFinalize(t *testing.T) {
-	outcome := fixtureBlobCache().Finalize()
-	assert.Nil(t, outcome)
-}
-
 func TestBlobCacheGetBlob(t *testing.T) {
 	cache := fixtureBlobCache()
 	treeFrom, _ := testRepository.TreeObject(plumbing.NewHash(

+ 165 - 3
burndown.go

@@ -6,13 +6,17 @@ import (
 	"fmt"
 	"io"
 	"os"
+	"sort"
 	"unicode/utf8"
 
+	"github.com/gogo/protobuf/proto"
 	"github.com/sergi/go-diff/diffmatchpatch"
 	"gopkg.in/src-d/go-git.v4"
 	"gopkg.in/src-d/go-git.v4/plumbing"
 	"gopkg.in/src-d/go-git.v4/plumbing/object"
 	"gopkg.in/src-d/go-git.v4/utils/merkletrie"
+	"gopkg.in/src-d/hercules.v3/pb"
+	"gopkg.in/src-d/hercules.v3/stdout"
 )
 
 // BurndownAnalyser allows to gather the line burndown statistics for a Git repository.
@@ -60,6 +64,8 @@ type BurndownAnalysis struct {
 	// previousDay is the day from the previous sample period -
 	// different from DaysSinceStart.previousDay.
 	previousDay int
+	// references IdentityDetector.ReversedPeopleDict
+	reversedPeopleDict []string
 }
 
 type BurndownResult struct {
@@ -69,6 +75,14 @@ type BurndownResult struct {
 	PeopleMatrix    [][]int64
 }
 
+const (
+	ConfigBurndownGranularity = "Burndown.Granularity"
+	ConfigBurndownSampling    = "Burndown.Sampling"
+	ConfigBurndownTrackFiles  = "Burndown.TrackFiles"
+	ConfigBurndownTrackPeople = "Burndown.TrackPeople"
+	ConfigBurndownDebug       = "Burndown.Debug"
+)
+
 func (analyser *BurndownAnalysis) Name() string {
 	return "Burndown"
 }
@@ -78,11 +92,77 @@ func (analyser *BurndownAnalysis) Provides() []string {
 }
 
 func (analyser *BurndownAnalysis) Requires() []string {
-	arr := [...]string{"file_diff", "renamed_changes", "blob_cache", "day", "author"}
+	arr := [...]string{"file_diff", "changes", "blob_cache", "day", "author"}
 	return arr[:]
 }
 
+func (analyser *BurndownAnalysis) ListConfigurationOptions() []ConfigurationOption {
+	options := [...]ConfigurationOption{{
+		Name:        ConfigBurndownGranularity,
+		Description: "How many days there are in a single band.",
+		Flag:        "granularity",
+		Type:        IntConfigurationOption,
+		Default:     30}, {
+		Name:        ConfigBurndownSampling,
+		Description: "How frequently to record the state in days.",
+		Flag:        "sampling",
+		Type:        IntConfigurationOption,
+		Default:     30}, {
+		Name:        ConfigBurndownTrackFiles,
+		Description: "Record detailed statistics per each file.",
+		Flag:        "burndown-files",
+		Type:        BoolConfigurationOption,
+		Default:     false}, {
+		Name:        ConfigBurndownTrackPeople,
+		Description: "Record detailed statistics per each developer.",
+		Flag:        "burndown-people",
+		Type:        BoolConfigurationOption,
+		Default:     false}, {
+		Name:        ConfigBurndownDebug,
+		Description: "Validate the trees on each step.",
+		Flag:        "burndown-debug",
+		Type:        BoolConfigurationOption,
+		Default:     false},
+	}
+	return options[:]
+}
+
+func (analyser *BurndownAnalysis) Configure(facts map[string]interface{}) {
+	if val, exists := facts[ConfigBurndownGranularity].(int); exists {
+		analyser.Granularity = val
+	}
+	if val, exists := facts[ConfigBurndownSampling].(int); exists {
+		analyser.Sampling = val
+	}
+	if val, exists := facts[ConfigBurndownTrackFiles].(bool); exists {
+		analyser.TrackFiles = val
+	}
+	if people, exists := facts[ConfigBurndownTrackPeople].(bool); people {
+		if val, exists := facts[FactIdentityDetectorPeopleCount].(int); exists {
+			analyser.PeopleNumber = val
+			analyser.reversedPeopleDict = facts[FactIdentityDetectorReversedPeopleDict].([]string)
+		}
+	} else if exists {
+		analyser.PeopleNumber = 0
+	}
+	if val, exists := facts[ConfigBurndownDebug].(bool); exists {
+		analyser.Debug = val
+	}
+}
+
+func (analyser *BurndownAnalysis) Flag() string {
+	return "burndown"
+}
+
 func (analyser *BurndownAnalysis) Initialize(repository *git.Repository) {
+	if analyser.Granularity <= 0 {
+		fmt.Fprintln(os.Stderr, "Warning: adjusted the granularity to 30 days")
+		analyser.Granularity = 30
+	}
+	if analyser.Sampling <= 0 {
+		fmt.Fprintln(os.Stderr, "Warning: adjusted the sampling to 30 days")
+		analyser.Sampling = 30
+	}
 	analyser.repository = repository
 	analyser.globalStatus = map[int]int64{}
 	analyser.globalHistory = [][]int64{}
@@ -109,7 +189,7 @@ func (analyser *BurndownAnalysis) Consume(deps map[string]interface{}) (map[stri
 		analyser.updateHistories(gs, fss, pss, delta)
 	}
 	cache := deps["blob_cache"].(map[plumbing.Hash]*object.Blob)
-	treeDiffs := deps["renamed_changes"].(object.Changes)
+	treeDiffs := deps["changes"].(object.Changes)
 	fileDiffs := deps["file_diff"].(map[string]FileDiffData)
 	for _, change := range treeDiffs {
 		action, err := change.Action()
@@ -167,7 +247,85 @@ func (analyser *BurndownAnalysis) Finalize() interface{} {
 		GlobalHistory:   analyser.globalHistory,
 		FileHistories:   analyser.fileHistories,
 		PeopleHistories: analyser.peopleHistories,
-		PeopleMatrix:    peopleMatrix}
+		PeopleMatrix:    peopleMatrix,
+	}
+}
+
+func (analyser *BurndownAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
+	burndownResult := result.(BurndownResult)
+	if binary {
+		return analyser.serializeBinary(&burndownResult, writer)
+	}
+	analyser.serializeText(&burndownResult, writer)
+	return nil
+}
+
+func (analyser *BurndownAnalysis) serializeText(result *BurndownResult, writer io.Writer) {
+	fmt.Fprintln(writer, "  granularity:", analyser.Granularity)
+	fmt.Fprintln(writer, "  sampling:", analyser.Sampling)
+	stdout.PrintMatrix(writer, result.GlobalHistory, 2, "project", true)
+	if len(result.FileHistories) > 0 {
+		fmt.Fprintln(writer, "  files:")
+		keys := sortedKeys(result.FileHistories)
+		for _, key := range keys {
+			stdout.PrintMatrix(writer, result.FileHistories[key], 4, key, true)
+		}
+	}
+
+	if len(result.PeopleHistories) > 0 {
+		fmt.Fprintln(writer, "  people_sequence:")
+		for key := range result.PeopleHistories {
+			fmt.Fprintln(writer, "    - "+stdout.SafeString(analyser.reversedPeopleDict[key]))
+		}
+		fmt.Fprintln(writer, "  people:")
+		for key, val := range result.PeopleHistories {
+			stdout.PrintMatrix(writer, val, 4, analyser.reversedPeopleDict[key], true)
+		}
+		fmt.Fprintln(writer, "  people_interaction: |-")
+		stdout.PrintMatrix(writer, result.PeopleMatrix, 4, "", false)
+	}
+}
+
+func (analyser *BurndownAnalysis) serializeBinary(result *BurndownResult, writer io.Writer) error {
+	message := pb.BurndownAnalysisResults{
+		Granularity: int32(analyser.Granularity),
+		Sampling:    int32(analyser.Sampling),
+		Project:     pb.ToBurndownSparseMatrix(result.GlobalHistory, "project"),
+	}
+	if len(result.FileHistories) > 0 {
+		message.Files = make([]*pb.BurndownSparseMatrix, len(result.FileHistories))
+		keys := sortedKeys(result.FileHistories)
+		i := 0
+		for _, key := range keys {
+			message.Files[i] = pb.ToBurndownSparseMatrix(
+				result.FileHistories[key], key)
+			i++
+		}
+	}
+
+	if len(result.PeopleHistories) > 0 {
+		message.People = make(
+			[]*pb.BurndownSparseMatrix, len(result.PeopleHistories))
+		for key, val := range result.PeopleHistories {
+			message.People[key] = pb.ToBurndownSparseMatrix(val, analyser.reversedPeopleDict[key])
+		}
+		message.PeopleInteraction = pb.DenseToCompressedSparseRowMatrix(result.PeopleMatrix)
+	}
+	serialized, err := proto.Marshal(&message)
+	if err != nil {
+		return err
+	}
+	writer.Write(serialized)
+	return nil
+}
+
+func sortedKeys(m map[string][][]int64) []string {
+	keys := make([]string, 0, len(m))
+	for k := range m {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+	return keys
 }
 
 func checkClose(c io.Closer) {
@@ -531,3 +689,7 @@ func (analyser *BurndownAnalysis) updateHistories(
 		analyser.peopleHistories[key] = ph
 	}
 }
+
+func init() {
+	Registry.Register(&BurndownAnalysis{})
+}

+ 278 - 4
burndown_test.go

@@ -1,22 +1,76 @@
 package hercules
 
 import (
+	"bytes"
+	"io"
 	"testing"
 
+	"github.com/gogo/protobuf/proto"
 	"github.com/stretchr/testify/assert"
 	"gopkg.in/src-d/go-git.v4/plumbing"
 	"gopkg.in/src-d/go-git.v4/plumbing/object"
-	"io"
+	"gopkg.in/src-d/hercules.v3/pb"
 )
 
 func TestBurndownMeta(t *testing.T) {
 	burndown := BurndownAnalysis{}
 	assert.Equal(t, burndown.Name(), "Burndown")
 	assert.Equal(t, len(burndown.Provides()), 0)
-	required := [...]string{"file_diff", "renamed_changes", "blob_cache", "day", "author"}
+	required := [...]string{"file_diff", "changes", "blob_cache", "day", "author"}
 	for _, name := range required {
 		assert.Contains(t, burndown.Requires(), name)
 	}
+	opts := burndown.ListConfigurationOptions()
+	matches := 0
+	for _, opt := range opts {
+		switch opt.Name {
+		case ConfigBurndownGranularity, ConfigBurndownSampling, ConfigBurndownTrackFiles,
+			ConfigBurndownTrackPeople, ConfigBurndownDebug:
+			matches++
+		}
+	}
+	assert.Len(t, opts, matches)
+	assert.Equal(t, burndown.Flag(), "burndown")
+}
+
+func TestBurndownConfigure(t *testing.T) {
+	burndown := BurndownAnalysis{}
+	facts := map[string]interface{}{}
+	facts[ConfigBurndownGranularity] = 100
+	facts[ConfigBurndownSampling] = 200
+	facts[ConfigBurndownTrackFiles] = true
+	facts[ConfigBurndownTrackPeople] = true
+	facts[ConfigBurndownDebug] = true
+	facts[FactIdentityDetectorPeopleCount] = 5
+	facts[FactIdentityDetectorReversedPeopleDict] = burndown.Requires()
+	burndown.Configure(facts)
+	assert.Equal(t, burndown.Granularity, 100)
+	assert.Equal(t, burndown.Sampling, 200)
+	assert.Equal(t, burndown.TrackFiles, true)
+	assert.Equal(t, burndown.PeopleNumber, 5)
+	assert.Equal(t, burndown.Debug, true)
+	assert.Equal(t, burndown.reversedPeopleDict, burndown.Requires())
+	facts[ConfigBurndownTrackPeople] = false
+	facts[FactIdentityDetectorPeopleCount] = 50
+	burndown.Configure(facts)
+	assert.Equal(t, burndown.PeopleNumber, 0)
+	facts = map[string]interface{}{}
+	burndown.Configure(facts)
+	assert.Equal(t, burndown.Granularity, 100)
+	assert.Equal(t, burndown.Sampling, 200)
+	assert.Equal(t, burndown.TrackFiles, true)
+	assert.Equal(t, burndown.PeopleNumber, 0)
+	assert.Equal(t, burndown.Debug, true)
+	assert.Equal(t, burndown.reversedPeopleDict, burndown.Requires())
+}
+
+func TestBurndownRegistration(t *testing.T) {
+	tp, exists := Registry.registered[(&BurndownAnalysis{}).Name()]
+	assert.True(t, exists)
+	assert.Equal(t, tp.Elem().Name(), "BurndownAnalysis")
+	tp, exists = Registry.flags[(&BurndownAnalysis{}).Flag()]
+	assert.True(t, exists)
+	assert.Equal(t, tp.Elem().Name(), "BurndownAnalysis")
 }
 
 func TestBurndownConsumeFinalize(t *testing.T) {
@@ -84,7 +138,7 @@ func TestBurndownConsumeFinalize(t *testing.T) {
 		},
 	},
 	}
-	deps["renamed_changes"] = changes
+	deps["changes"] = changes
 	fd := fixtureFileDiff()
 	result, err := fd.Consume(deps)
 	assert.Nil(t, err)
@@ -181,7 +235,7 @@ func TestBurndownConsumeFinalize(t *testing.T) {
 		},
 	}, To: object.ChangeEntry{},
 	}
-	deps["renamed_changes"] = changes
+	deps["changes"] = changes
 	fd = fixtureFileDiff()
 	result, err = fd.Consume(deps)
 	assert.Nil(t, err)
@@ -240,6 +294,226 @@ func TestBurndownConsumeFinalize(t *testing.T) {
 	}
 }
 
+func TestBurndownAnalysisSerialize(t *testing.T) {
+	burndown := BurndownAnalysis{
+		Granularity:  30,
+		Sampling:     30,
+		PeopleNumber: 2,
+		TrackFiles:   true,
+	}
+	burndown.Initialize(testRepository)
+	deps := map[string]interface{}{}
+	// stage 1
+	deps["author"] = 0
+	deps["day"] = 0
+	cache := map[plumbing.Hash]*object.Blob{}
+	hash := plumbing.NewHash("291286b4ac41952cbd1389fda66420ec03c1a9fe")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	hash = plumbing.NewHash("c29112dbd697ad9b401333b80c18a63951bc18d9")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	hash = plumbing.NewHash("baa64828831d174f40140e4b3cfa77d1e917a2c1")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	hash = plumbing.NewHash("dc248ba2b22048cc730c571a748e8ffcf7085ab9")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	deps["blob_cache"] = cache
+	changes := make(object.Changes, 3)
+	treeFrom, _ := testRepository.TreeObject(plumbing.NewHash(
+		"a1eb2ea76eb7f9bfbde9b243861474421000eb96"))
+	treeTo, _ := testRepository.TreeObject(plumbing.NewHash(
+		"994eac1cd07235bb9815e547a75c84265dea00f5"))
+	changes[0] = &object.Change{From: object.ChangeEntry{
+		Name: "analyser.go",
+		Tree: treeFrom,
+		TreeEntry: object.TreeEntry{
+			Name: "analyser.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("dc248ba2b22048cc730c571a748e8ffcf7085ab9"),
+		},
+	}, To: object.ChangeEntry{
+		Name: "analyser.go",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: "analyser.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("baa64828831d174f40140e4b3cfa77d1e917a2c1"),
+		},
+	}}
+	changes[1] = &object.Change{From: object.ChangeEntry{}, To: object.ChangeEntry{
+		Name: "cmd/hercules/main.go",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: "cmd/hercules/main.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("c29112dbd697ad9b401333b80c18a63951bc18d9"),
+		},
+	},
+	}
+	changes[2] = &object.Change{From: object.ChangeEntry{}, To: object.ChangeEntry{
+		Name: ".travis.yml",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: ".travis.yml",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("291286b4ac41952cbd1389fda66420ec03c1a9fe"),
+		},
+	},
+	}
+	deps["changes"] = changes
+	fd := fixtureFileDiff()
+	result, _ := fd.Consume(deps)
+	deps["file_diff"] = result["file_diff"]
+	burndown.Consume(deps)
+
+	// stage 2
+	// 2b1ed978194a94edeabbca6de7ff3b5771d4d665
+	deps["author"] = 1
+	deps["day"] = 30
+	cache = map[plumbing.Hash]*object.Blob{}
+	hash = plumbing.NewHash("291286b4ac41952cbd1389fda66420ec03c1a9fe")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	hash = plumbing.NewHash("baa64828831d174f40140e4b3cfa77d1e917a2c1")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	hash = plumbing.NewHash("29c9fafd6a2fae8cd20298c3f60115bc31a4c0f2")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	hash = plumbing.NewHash("c29112dbd697ad9b401333b80c18a63951bc18d9")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	hash = plumbing.NewHash("f7d918ec500e2f925ecde79b51cc007bac27de72")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	deps["blob_cache"] = cache
+	changes = make(object.Changes, 3)
+	treeFrom, _ = testRepository.TreeObject(plumbing.NewHash(
+		"96c6ece9b2f3c7c51b83516400d278dea5605100"))
+	treeTo, _ = testRepository.TreeObject(plumbing.NewHash(
+		"251f2094d7b523d5bcc60e663b6cf38151bf8844"))
+	changes[0] = &object.Change{From: object.ChangeEntry{
+		Name: "analyser.go",
+		Tree: treeFrom,
+		TreeEntry: object.TreeEntry{
+			Name: "analyser.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("baa64828831d174f40140e4b3cfa77d1e917a2c1"),
+		},
+	}, To: object.ChangeEntry{
+		Name: "burndown.go",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: "burndown.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("29c9fafd6a2fae8cd20298c3f60115bc31a4c0f2"),
+		},
+	},
+	}
+	changes[1] = &object.Change{From: object.ChangeEntry{
+		Name: "cmd/hercules/main.go",
+		Tree: treeFrom,
+		TreeEntry: object.TreeEntry{
+			Name: "cmd/hercules/main.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("c29112dbd697ad9b401333b80c18a63951bc18d9"),
+		},
+	}, To: object.ChangeEntry{
+		Name: "cmd/hercules/main.go",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: "cmd/hercules/main.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("f7d918ec500e2f925ecde79b51cc007bac27de72"),
+		},
+	},
+	}
+	changes[2] = &object.Change{From: object.ChangeEntry{
+		Name: ".travis.yml",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: ".travis.yml",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("291286b4ac41952cbd1389fda66420ec03c1a9fe"),
+		},
+	}, To: object.ChangeEntry{},
+	}
+	deps["changes"] = changes
+	fd = fixtureFileDiff()
+	result, _ = fd.Consume(deps)
+	deps["file_diff"] = result["file_diff"]
+	burndown.Consume(deps)
+	out := burndown.Finalize().(BurndownResult)
+
+	people := [...]string{"one@srcd", "two@srcd"}
+	burndown.reversedPeopleDict = people[:]
+	buffer := &bytes.Buffer{}
+	burndown.Serialize(out, false, buffer)
+	assert.Equal(t, buffer.String(), `  granularity: 30
+  sampling: 30
+  "project": |-
+    1145    0
+     464  369
+  files:
+    "burndown.go": |-
+      0     0
+      293 250
+    "cmd/hercules/main.go": |-
+      207   0
+      171 119
+  people_sequence:
+    - "one@srcd"
+    - "two@srcd"
+  people:
+    "one@srcd": |-
+      1145    0
+       464    0
+    "two@srcd": |-
+      0     0
+        0 369
+  people_interaction: |-
+    1145    0    0 -681
+     369    0    0    0
+`)
+	buffer = &bytes.Buffer{}
+	burndown.Serialize(out, true, buffer)
+	msg := pb.BurndownAnalysisResults{}
+	proto.Unmarshal(buffer.Bytes(), &msg)
+	assert.Equal(t, msg.Granularity, int32(30))
+	assert.Equal(t, msg.Sampling, int32(30))
+	assert.Equal(t, msg.Project.Name, "project")
+	assert.Equal(t, msg.Project.NumberOfRows, int32(2))
+	assert.Equal(t, msg.Project.NumberOfColumns, int32(2))
+	assert.Len(t, msg.Project.Rows, 2)
+	assert.Len(t, msg.Project.Rows[0].Columns, 1)
+	assert.Equal(t, msg.Project.Rows[0].Columns[0], uint32(1145))
+	assert.Len(t, msg.Project.Rows[1].Columns, 2)
+	assert.Equal(t, msg.Project.Rows[1].Columns[0], uint32(464))
+	assert.Equal(t, msg.Project.Rows[1].Columns[1], uint32(369))
+	assert.Len(t, msg.Files, 2)
+	assert.Equal(t, msg.Files[0].Name, "burndown.go")
+	assert.Equal(t, msg.Files[1].Name, "cmd/hercules/main.go")
+	assert.Len(t, msg.Files[0].Rows, 2)
+	assert.Len(t, msg.Files[0].Rows[0].Columns, 0)
+	assert.Len(t, msg.Files[0].Rows[1].Columns, 2)
+	assert.Equal(t, msg.Files[0].Rows[1].Columns[0], uint32(293))
+	assert.Equal(t, msg.Files[0].Rows[1].Columns[1], uint32(250))
+	assert.Len(t, msg.People, 2)
+	assert.Equal(t, msg.People[0].Name, "one@srcd")
+	assert.Equal(t, msg.People[1].Name, "two@srcd")
+	assert.Len(t, msg.People[0].Rows, 2)
+	assert.Len(t, msg.People[0].Rows[0].Columns, 1)
+	assert.Len(t, msg.People[0].Rows[1].Columns, 1)
+	assert.Equal(t, msg.People[0].Rows[0].Columns[0], uint32(1145))
+	assert.Equal(t, msg.People[0].Rows[1].Columns[0], uint32(464))
+	assert.Len(t, msg.People[1].Rows, 2)
+	assert.Len(t, msg.People[1].Rows[0].Columns, 0)
+	assert.Len(t, msg.People[1].Rows[1].Columns, 2)
+	assert.Equal(t, msg.People[1].Rows[1].Columns[0], uint32(0))
+	assert.Equal(t, msg.People[1].Rows[1].Columns[1], uint32(369))
+	assert.Equal(t, msg.PeopleInteraction.NumberOfRows, int32(2))
+	assert.Equal(t, msg.PeopleInteraction.NumberOfColumns, int32(4))
+	data := [...]int64{1145, -681, 369}
+	assert.Equal(t, msg.PeopleInteraction.Data, data[:])
+	indices := [...]int32{0, 3, 0}
+	assert.Equal(t, msg.PeopleInteraction.Indices, indices[:])
+	indptr := [...]int64{0, 2, 3}
+	assert.Equal(t, msg.PeopleInteraction.Indptr, indptr[:])
+}
+
 type panickingCloser struct {
 }
 

+ 153 - 204
cmd/hercules/main.go

@@ -32,11 +32,13 @@ import (
 	"flag"
 	"fmt"
 	"io"
+	"io/ioutil"
 	"net/http"
 	_ "net/http/pprof"
 	"os"
+	"plugin"
 	"runtime/pprof"
-	"sort"
+	"strconv"
 	"strings"
 
 	"gopkg.in/src-d/go-billy.v3/osfs"
@@ -45,21 +47,14 @@ import (
 	"gopkg.in/src-d/go-git.v4/storage"
 	"gopkg.in/src-d/go-git.v4/storage/filesystem"
 	"gopkg.in/src-d/go-git.v4/storage/memory"
-	"gopkg.in/src-d/hercules.v2"
-	"gopkg.in/src-d/hercules.v2/stdout"
-	"gopkg.in/src-d/hercules.v2/pb"
+	"gopkg.in/src-d/hercules.v3"
+	"gopkg.in/src-d/hercules.v3/pb"
+	"github.com/vbauerster/mpb"
+	"github.com/vbauerster/mpb/decor"
 	"github.com/gogo/protobuf/proto"
+	"golang.org/x/crypto/ssh/terminal"
 )
 
-func sortedKeys(m map[string][][]int64) []string {
-	keys := make([]string, 0, len(m))
-	for k := range m {
-		keys = append(keys, k)
-	}
-	sort.Strings(keys)
-	return keys
-}
-
 type OneLineWriter struct {
 	Writer io.Writer
 }
@@ -77,52 +72,7 @@ func (writer OneLineWriter) Write(p []byte) (n int, err error) {
 	return
 }
 
-func main() {
-	var protobuf bool
-	var withFiles bool
-	var withPeople bool
-	var withCouples bool
-	var people_dict_path string
-	var profile bool
-	var granularity, sampling, similarity_threshold int
-	var commitsFile string
-	var ignoreMissingSubmodules bool
-	var debug bool
-	flag.BoolVar(&withFiles, "files", false, "Output detailed statistics per each file.")
-	flag.BoolVar(&withPeople, "people", false, "Output detailed statistics per each developer.")
-	flag.BoolVar(&withCouples, "couples", false, "Gather the co-occurrence matrix "+
-		"for files and people.")
-	flag.StringVar(&people_dict_path, "people-dict", "", "Path to the developers' email associations.")
-	flag.BoolVar(&profile, "profile", false, "Collect the profile to hercules.pprof.")
-	flag.IntVar(&granularity, "granularity", 30, "How many days there are in a single band.")
-	flag.IntVar(&sampling, "sampling", 30, "How frequently to record the state in days.")
-	flag.IntVar(&similarity_threshold, "M", 90,
-		"A threshold on the similarity index used to detect renames.")
-	flag.BoolVar(&debug, "debug", false, "Validate the trees on each step.")
-	flag.StringVar(&commitsFile, "commits", "", "Path to the text file with the "+
-		"commit history to follow instead of the default rev-list "+
-		"--first-parent. The format is the list of hashes, each hash on a "+
-		"separate line. The first hash is the root.")
-	flag.BoolVar(&ignoreMissingSubmodules, "ignore-missing-submodules", false,
-		"Do not panic on submodules which are not registered..")
-	flag.BoolVar(&protobuf, "pb", false, "The output format will be Protocol Buffers instead of YAML.")
-	flag.Parse()
-	if granularity <= 0 {
-		fmt.Fprint(os.Stderr, "Warning: adjusted the granularity to 1 day\n")
-		granularity = 1
-	}
-	if profile {
-		go http.ListenAndServe("localhost:6060", nil)
-		prof, _ := os.Create("hercules.pprof")
-		pprof.StartCPUProfile(prof)
-		defer pprof.StopCPUProfile()
-	}
-	if len(flag.Args()) == 0 || len(flag.Args()) > 3 {
-		fmt.Fprint(os.Stderr,
-			"Usage: hercules <path to repo or URL> [<disk cache path>]\n")
-		os.Exit(1)
-	}
-	uri := flag.Arg(0)
+func loadRepository(uri string, disableStatus bool) *git.Repository {
 	var repository *git.Repository
 	var backend storage.Storer
 	var err error
@@ -140,12 +90,15 @@ func main() {
 		} else {
 			backend = memory.NewStorage()
 		}
-		fmt.Fprint(os.Stderr, "cloning...\r")
-		repository, err = git.Clone(backend, nil, &git.CloneOptions{
-			URL: uri,
-			Progress: OneLineWriter{Writer: os.Stderr},
-		})
-		fmt.Fprint(os.Stderr, strings.Repeat(" ", 80) + "\r")
+		cloneOptions := &git.CloneOptions{URL: uri}
+		if !disableStatus {
+			fmt.Fprint(os.Stderr, "connecting...\r")
+			cloneOptions.Progress = OneLineWriter{Writer: os.Stderr}
+		}
+		repository, err = git.Clone(backend, nil, cloneOptions)
+		if !disableStatus {
+			fmt.Fprint(os.Stderr, strings.Repeat(" ", 80)+"\r")
+		}
 	} else {
 		if uri[len(uri)-1] == os.PathSeparator {
 			uri = uri[:len(uri)-1]
@@ -155,192 +108,188 @@ func main() {
 	if err != nil {
 		panic(err)
 	}
+	return repository
+}
+
+
+type arrayPluginFlags map[string]bool
+
+func (apf *arrayPluginFlags) String() string {
+	list := []string{}
+	for key := range *apf {
+		list = append(list, key)
+	}
+	return strings.Join(list, ", ")
+}
+
+func (apf *arrayPluginFlags) Set(value string) error {
+	(*apf)[value] = true
+	return nil
+}
+
+func loadPlugins() {
+	pluginFlags := arrayPluginFlags{}
+	fs := flag.NewFlagSet(os.Args[0], flag.ContinueOnError)
+	fs.SetOutput(ioutil.Discard)
+	pluginFlagName := "plugin"
+	pluginDesc := "Load the specified plugin by the full or relative path. " +
+			"Can be specified multiple times."
+	fs.Var(&pluginFlags, pluginFlagName, pluginDesc)
+	flag.Var(&pluginFlags, pluginFlagName, pluginDesc)
+	fs.Parse(os.Args[1:])
+	for path := range pluginFlags {
+		_, err := plugin.Open(path)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Failed to load plugin from %s %s", path, err)
+		}
+	}
+}
+
+func main() {
+	loadPlugins()
+	var printVersion, protobuf, profile, disableStatus bool
+	var commitsFile string
+	flag.BoolVar(&profile, "profile", false, "Collect the profile to hercules.pprof.")
+	flag.StringVar(&commitsFile, "commits", "", "Path to the text file with the "+
+		"commit history to follow instead of the default rev-list "+
+		"--first-parent. The format is the list of hashes, each hash on a "+
+		"separate line. The first hash is the root.")
+	flag.BoolVar(&protobuf, "pb", false, "The output format will be Protocol Buffers instead of YAML.")
+	flag.BoolVar(&printVersion, "version", false, "Print version information and exit.")
+	flag.BoolVar(&disableStatus, "quiet", false, "Do not print status updates to stderr.")
+	facts, deployChoices := hercules.Registry.AddFlags()
+	flag.Parse()
+
+	if printVersion {
+		fmt.Printf("Version: 3\nGit:     %s\n", hercules.GIT_HASH)
+		return
+	}
+
+	if profile {
+		go http.ListenAndServe("localhost:6060", nil)
+		prof, _ := os.Create("hercules.pprof")
+		pprof.StartCPUProfile(prof)
+		defer pprof.StopCPUProfile()
+	}
+	if len(flag.Args()) == 0 || len(flag.Args()) > 3 {
+		fmt.Fprint(os.Stderr,
+			"Usage: hercules <path to repo or URL> [<disk cache path>]\n")
+		os.Exit(1)
+	}
+	uri := flag.Arg(0)
+	repository := loadRepository(uri, disableStatus)
 
 	// core logic
 	pipeline := hercules.NewPipeline(repository)
-	pipeline.OnProgress = func(commit, length int) {
-		if commit < length {
-			fmt.Fprintf(os.Stderr, "%d / %d\r", commit, length)
-		} else {
-			fmt.Fprint(os.Stderr, "finalizing...    \r")
+	pipeline.SetFeaturesFromFlags()
+	if terminal.IsTerminal(int(os.Stderr.Fd())) && !disableStatus {
+		progress := mpb.New(mpb.Output(os.Stderr))
+		defer progress.Stop()
+		var bar *mpb.Bar
+		pipeline.OnProgress = func(commit, length int) {
+			if bar == nil {
+				width := len(strconv.Itoa(length))*2 + 3
+				bar = progress.AddBar(int64(length+1),
+					mpb.PrependDecorators(decor.DynamicName(
+						func(stats *decor.Statistics) string {
+							if stats.Current < stats.Total {
+								return fmt.Sprintf("%d / %d", stats.Current, length)
+							}
+							return "finalizing"
+						}, width, 0)),
+					mpb.AppendDecorators(decor.ETA(4, 0)),
+				)
+			}
+			bar.Incr(commit - int(bar.Current()))
 		}
 	}
-	// list of commits belonging to the default branch, from oldest to newest
-	// rev-list --first-parent
+
 	var commits []*object.Commit
 	if commitsFile == "" {
+		// list of commits belonging to the default branch, from oldest to newest
+		// rev-list --first-parent
 		commits = pipeline.Commits()
 	} else {
+		var err error
 		commits, err = hercules.LoadCommitsFromFile(commitsFile, repository)
 		if err != nil {
 			panic(err)
 		}
 	}
-
-	pipeline.AddItem(&hercules.BlobCache{
-		IgnoreMissingSubmodules: ignoreMissingSubmodules,
-	})
-	pipeline.AddItem(&hercules.DaysSinceStart{})
-	pipeline.AddItem(&hercules.RenameAnalysis{SimilarityThreshold: similarity_threshold})
-	pipeline.AddItem(&hercules.TreeDiff{})
-	pipeline.AddItem(&hercules.FileDiff{})
-	id_matcher := &hercules.IdentityDetector{}
-	var peopleCount int
-	if withPeople || withCouples {
-		if people_dict_path != "" {
-			id_matcher.LoadPeopleDict(people_dict_path)
-			peopleCount = len(id_matcher.ReversePeopleDict) - 1
-		} else {
-			id_matcher.GeneratePeopleDict(commits)
-			peopleCount = len(id_matcher.ReversePeopleDict)
+	facts["commits"] = commits
+	deployed := []hercules.PipelineItem{}
+	for name, valPtr := range deployChoices {
+		if *valPtr {
+			deployed = append(deployed, pipeline.DeployItem(hercules.Registry.Summon(name)[0]))
 		}
 	}
-	pipeline.AddItem(id_matcher)
-	burndowner := &hercules.BurndownAnalysis{
-		Granularity:  granularity,
-		Sampling:     sampling,
-		Debug:        debug,
-		TrackFiles:   withFiles,
-		PeopleNumber: peopleCount,
-	}
-	pipeline.AddItem(burndowner)
-	var coupler *hercules.Couples
-	if withCouples {
-		coupler = &hercules.Couples{PeopleNumber: peopleCount}
-		pipeline.AddItem(coupler)
+	pipeline.Initialize(facts)
+	if dryRun, _ := facts[hercules.ConfigPipelineDryRun].(bool); dryRun {
+		return
 	}
-
-	pipeline.Initialize()
-	result, err := pipeline.Run(commits)
+	results, err := pipeline.Run(commits)
 	if err != nil {
 		panic(err)
 	}
-	fmt.Fprint(os.Stderr, "writing...    \r")
-	burndownResults := result[burndowner].(hercules.BurndownResult)
-	var couplesResult hercules.CouplesResult
-	if withCouples {
-		couplesResult = result[coupler].(hercules.CouplesResult)
-	}
-	if len(burndownResults.GlobalHistory) == 0 {
-		return
+	if !disableStatus {
+		fmt.Fprint(os.Stderr, "writing...\r")
 	}
 	begin := commits[0].Author.When.Unix()
 	end := commits[len(commits)-1].Author.When.Unix()
 	if !protobuf {
-		printResults(uri, begin, end, granularity, sampling,
-			withFiles, withPeople, withCouples,
-			burndownResults, couplesResult, id_matcher.ReversePeopleDict)
+		printResults(uri, begin, end, len(commits), deployed, results)
 	} else {
-		serializeResults(uri, begin, end, granularity, sampling,
-			withFiles, withPeople, withCouples,
-			burndownResults, couplesResult, id_matcher.ReversePeopleDict)
+		protobufResults(uri, begin, end, len(commits), deployed, results)
 	}
 }
 
 func printResults(
-	uri string, begin, end int64, granularity, sampling int,
-	withFiles, withPeople, withCouples bool,
-	burndownResults hercules.BurndownResult,
-	couplesResult hercules.CouplesResult,
-	reversePeopleDict []string) {
+	uri string, begin, end int64, commitsCount int, deployed []hercules.PipelineItem,
+	results map[hercules.PipelineItem]interface{}) {
+	fmt.Println("hercules:")
+	fmt.Println("  version: 3")
+	fmt.Println("  hash:", hercules.GIT_HASH)
+	fmt.Println("  repository:", uri)
+	fmt.Println("  begin_unix_time:", begin)
+	fmt.Println("  end_unix_time:", end)
+	fmt.Println("  commits:", commitsCount)
 
-	fmt.Println("burndown:")
-	fmt.Println("  version: 1")
-	fmt.Println("  begin:", begin)
-	fmt.Println("  end:", end)
-	fmt.Println("  granularity:", granularity)
-	fmt.Println("  sampling:", sampling)
-	fmt.Println("project:")
-	stdout.PrintMatrix(burndownResults.GlobalHistory, uri, true)
-	if withFiles {
-		fmt.Println("files:")
-		keys := sortedKeys(burndownResults.FileHistories)
-		for _, key := range keys {
-			stdout.PrintMatrix(burndownResults.FileHistories[key], key, true)
-		}
-	}
-	if withPeople {
-		fmt.Println("people_sequence:")
-		for key := range burndownResults.PeopleHistories {
-			fmt.Println("  - " + stdout.SafeString(reversePeopleDict[key]))
-		}
-		fmt.Println("people:")
-		for key, val := range burndownResults.PeopleHistories {
-			stdout.PrintMatrix(val, reversePeopleDict[key], true)
+	for _, item := range deployed {
+		result := results[item]
+		fmt.Printf("%s:\n", item.Name())
+		err := interface{}(item).(hercules.LeafPipelineItem).Serialize(result, false, os.Stdout)
+		if err != nil {
+			panic(err)
 		}
-		fmt.Println("people_interaction: |-")
-		stdout.PrintMatrix(burndownResults.PeopleMatrix, "", false)
-	}
-	if withCouples {
-		stdout.PrintCouples(&couplesResult, reversePeopleDict)
 	}
 }
 
-func serializeResults(
-	uri string, begin, end int64, granularity, sampling int,
-	withFiles, withPeople, withCouples bool,
-	burndownResults hercules.BurndownResult,
-	couplesResult hercules.CouplesResult,
-	reversePeopleDict []string) {
+func protobufResults(
+	uri string, begin, end int64, commitsCount int, deployed []hercules.PipelineItem,
+	results map[hercules.PipelineItem]interface{}) {
 
   header := pb.Metadata{
 	  Version: 1,
-	  Cmdline: strings.Join(os.Args, " "),
+	  Hash: hercules.GIT_HASH,
 	  Repository: uri,
     BeginUnixTime: begin,
 	  EndUnixTime: end,
-	  Granularity: int32(granularity),
-	  Sampling: int32(sampling),
+	  Commits: int32(commitsCount),
   }
 
 	message := pb.AnalysisResults{
 		Header: &header,
-		BurndownProject: pb.ToBurndownSparseMatrix(burndownResults.GlobalHistory, uri),
+		Contents: map[string][]byte{},
 	}
 
-	if withFiles {
-		message.BurndownFiles = make([]*pb.BurndownSparseMatrix, len(burndownResults.FileHistories))
-		keys := sortedKeys(burndownResults.FileHistories)
-		i := 0
-		for _, key := range keys {
-			message.BurndownFiles[i] = pb.ToBurndownSparseMatrix(
-				burndownResults.FileHistories[key], key)
-			i++
-		}
-	}
-
-	if withPeople {
-		message.BurndownDevelopers = make(
-		  []*pb.BurndownSparseMatrix, len(burndownResults.PeopleHistories))
-		for key, val := range burndownResults.PeopleHistories {
-			message.BurndownDevelopers[key] = pb.ToBurndownSparseMatrix(val, reversePeopleDict[key])
-		}
-		message.DevelopersInteraction = pb.DenseToCompressedSparseRowMatrix(
-			burndownResults.PeopleMatrix)
-	}
-
-	if withCouples {
-		message.FileCouples = &pb.Couples{
-			Index: couplesResult.Files,
-			Matrix: pb.MapToCompressedSparseRowMatrix(couplesResult.FilesMatrix),
-		}
-		message.DeveloperCouples = &pb.Couples{
-			Index: reversePeopleDict,
-			Matrix: pb.MapToCompressedSparseRowMatrix(couplesResult.PeopleMatrix),
-		}
-		message.TouchedFiles = &pb.DeveloperTouchedFiles{
-      Developers: make([]*pb.TouchedFiles, len(reversePeopleDict)),
-		}
-		for key := range reversePeopleDict {
-			files := couplesResult.PeopleFiles[key]
-			int32Files := make([]int32, len(files))
-			for i, f := range files {
-				int32Files[i] = int32(f)
-			}
-			message.TouchedFiles.Developers[key] = &pb.TouchedFiles{
-				Files: int32Files,
-			}
+	for _, item := range deployed {
+		result := results[item]
+		buffer := &bytes.Buffer{}
+		err := interface{}(item).(hercules.LeafPipelineItem).Serialize(result, true, buffer)
+		if err != nil {
+			panic(err)
 		}
+		message.Contents[item.Name()] = buffer.Bytes()
 	}
 
 	serialized, err := proto.Marshal(&message)

+ 168 - 9
couples.go

@@ -1,14 +1,19 @@
 package hercules
 
 import (
+	"fmt"
+	"io"
 	"sort"
 
+	"github.com/gogo/protobuf/proto"
 	"gopkg.in/src-d/go-git.v4"
 	"gopkg.in/src-d/go-git.v4/plumbing/object"
 	"gopkg.in/src-d/go-git.v4/utils/merkletrie"
+	"gopkg.in/src-d/hercules.v3/pb"
+	"gopkg.in/src-d/hercules.v3/stdout"
 )
 
-type Couples struct {
+type CouplesAnalysis struct {
 	// The number of developers for which to build the matrix. 0 disables this analysis.
 	PeopleNumber int
 
@@ -18,6 +23,8 @@ type Couples struct {
 	people_commits []int
 	// files store every file occurred in the same commit with every other file.
 	files map[string]map[string]int
+	// references IdentityDetector.ReversedPeopleDict
+	reversedPeopleDict []string
 }
 
 type CouplesResult struct {
@@ -27,20 +34,35 @@ type CouplesResult struct {
 	Files        []string
 }
 
-func (couples *Couples) Name() string {
+func (couples *CouplesAnalysis) Name() string {
 	return "Couples"
 }
 
-func (couples *Couples) Provides() []string {
+func (couples *CouplesAnalysis) Provides() []string {
 	return []string{}
 }
 
-func (couples *Couples) Requires() []string {
-	arr := [...]string{"author", "renamed_changes"}
+func (couples *CouplesAnalysis) Requires() []string {
+	arr := [...]string{"author", "changes"}
 	return arr[:]
 }
 
-func (couples *Couples) Initialize(repository *git.Repository) {
+func (couples *CouplesAnalysis) ListConfigurationOptions() []ConfigurationOption {
+	return []ConfigurationOption{}
+}
+
+func (couples *CouplesAnalysis) Configure(facts map[string]interface{}) {
+	if val, exists := facts[FactIdentityDetectorPeopleCount].(int); exists {
+		couples.PeopleNumber = val
+		couples.reversedPeopleDict = facts[FactIdentityDetectorReversedPeopleDict].([]string)
+	}
+}
+
+func (couples *CouplesAnalysis) Flag() string {
+	return "couples"
+}
+
+func (couples *CouplesAnalysis) Initialize(repository *git.Repository) {
 	couples.people = make([]map[string]int, couples.PeopleNumber+1)
 	for i := range couples.people {
 		couples.people[i] = map[string]int{}
@@ -49,13 +71,13 @@ func (couples *Couples) Initialize(repository *git.Repository) {
 	couples.files = map[string]map[string]int{}
 }
 
-func (couples *Couples) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
+func (couples *CouplesAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
 	author := deps["author"].(int)
 	if author == MISSING_AUTHOR {
 		author = couples.PeopleNumber
 	}
 	couples.people_commits[author] += 1
-	tree_diff := deps["renamed_changes"].(object.Changes)
+	tree_diff := deps["changes"].(object.Changes)
 	context := make([]string, 0)
 	deleteFile := func(name string) {
 		// we do not remove the file from people - the context does not expire
@@ -114,7 +136,7 @@ func (couples *Couples) Consume(deps map[string]interface{}) (map[string]interfa
 	return nil, nil
 }
 
-func (couples *Couples) Finalize() interface{} {
+func (couples *CouplesAnalysis) Finalize() interface{} {
 	filesSequence := make([]string, len(couples.files))
 	i := 0
 	for file := range couples.files {
@@ -161,3 +183,140 @@ func (couples *Couples) Finalize() interface{} {
 		PeopleMatrix: peopleMatrix, PeopleFiles: peopleFiles,
 		Files: filesSequence, FilesMatrix: filesMatrix}
 }
+
+func (couples *CouplesAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
+	couplesResult := result.(CouplesResult)
+	if binary {
+		return couples.serializeBinary(&couplesResult, writer)
+	}
+	couples.serializeText(&couplesResult, writer)
+	return nil
+}
+
+func (couples *CouplesAnalysis) serializeText(result *CouplesResult, writer io.Writer) {
+	fmt.Fprintln(writer, "  files_coocc:")
+	fmt.Fprintln(writer, "    index:")
+	for _, file := range result.Files {
+		fmt.Fprintf(writer, "      - %s\n", stdout.SafeString(file))
+	}
+
+	fmt.Fprintln(writer, "    matrix:")
+	for _, files := range result.FilesMatrix {
+		fmt.Fprint(writer, "      - {")
+		indices := []int{}
+		for file := range files {
+			indices = append(indices, file)
+		}
+		sort.Ints(indices)
+		for i, file := range indices {
+			fmt.Fprintf(writer, "%d: %d", file, files[file])
+			if i < len(indices)-1 {
+				fmt.Fprint(writer, ", ")
+			}
+		}
+		fmt.Fprintln(writer, "}")
+	}
+
+	fmt.Fprintln(writer, "  people_coocc:")
+	fmt.Fprintln(writer, "    index:")
+	for _, person := range couples.reversedPeopleDict {
+		fmt.Fprintf(writer, "      - %s\n", stdout.SafeString(person))
+	}
+
+	fmt.Fprintln(writer, "    matrix:")
+	for _, people := range result.PeopleMatrix {
+		fmt.Fprint(writer, "      - {")
+		indices := []int{}
+		for file := range people {
+			indices = append(indices, file)
+		}
+		sort.Ints(indices)
+		for i, person := range indices {
+			fmt.Fprintf(writer, "%d: %d", person, people[person])
+			if i < len(indices)-1 {
+				fmt.Fprint(writer, ", ")
+			}
+		}
+		fmt.Fprintln(writer, "}")
+	}
+
+	fmt.Fprintln(writer, "    author_files:") // sorted by number of files each author changed
+	peopleFiles := sortByNumberOfFiles(result.PeopleFiles, couples.reversedPeopleDict, result.Files)
+	for _, authorFiles := range peopleFiles {
+		fmt.Fprintf(writer, "      - %s:\n", stdout.SafeString(authorFiles.Author))
+		sort.Strings(authorFiles.Files)
+		for _, file := range authorFiles.Files {
+			fmt.Fprintf(writer, "        - %s\n", stdout.SafeString(file)) // sorted by path
+		}
+	}
+}
+
+func sortByNumberOfFiles(
+	peopleFiles [][]int, peopleDict []string, filesDict []string) authorFilesList {
+	var pfl authorFilesList
+	for peopleIdx, files := range peopleFiles {
+		if peopleIdx < len(peopleDict) {
+			fileNames := make([]string, len(files))
+			for i, fi := range files {
+				fileNames[i] = filesDict[fi]
+			}
+			pfl = append(pfl, authorFiles{peopleDict[peopleIdx], fileNames})
+		}
+	}
+	sort.Sort(pfl)
+	return pfl
+}
+
+type authorFiles struct {
+	Author string
+	Files  []string
+}
+
+type authorFilesList []authorFiles
+
+func (s authorFilesList) Len() int {
+	return len(s)
+}
+func (s authorFilesList) Swap(i, j int) {
+	s[i], s[j] = s[j], s[i]
+}
+func (s authorFilesList) Less(i, j int) bool {
+	return len(s[i].Files) < len(s[j].Files)
+}
+
+func (couples *CouplesAnalysis) serializeBinary(result *CouplesResult, writer io.Writer) error {
+	message := pb.CouplesAnalysisResults{}
+
+	message.FileCouples = &pb.Couples{
+		Index:  result.Files,
+		Matrix: pb.MapToCompressedSparseRowMatrix(result.FilesMatrix),
+	}
+	message.DeveloperCouples = &pb.Couples{
+		Index:  couples.reversedPeopleDict,
+		Matrix: pb.MapToCompressedSparseRowMatrix(result.PeopleMatrix),
+	}
+	message.TouchedFiles = &pb.DeveloperTouchedFiles{
+		Developers: make([]*pb.TouchedFiles, len(couples.reversedPeopleDict)),
+	}
+	for key := range couples.reversedPeopleDict {
+		files := result.PeopleFiles[key]
+		int32Files := make([]int32, len(files))
+		for i, f := range files {
+			int32Files[i] = int32(f)
+		}
+		message.TouchedFiles.Developers[key] = &pb.TouchedFiles{
+			Files: int32Files,
+		}
+	}
+
+	serialized, err := proto.Marshal(&message)
+	if err != nil {
+		return err
+	}
+	writer.Write(serialized)
+	return nil
+}
+
+func init() {
+	Registry.Register(&CouplesAnalysis{})
+}

+ 109 - 7
couples_test.go

@@ -1,15 +1,18 @@
 package hercules
 
 import (
+	"bytes"
 	"strings"
 	"testing"
 
+	"github.com/gogo/protobuf/proto"
 	"github.com/stretchr/testify/assert"
 	"gopkg.in/src-d/go-git.v4/plumbing/object"
+	"gopkg.in/src-d/hercules.v3/pb"
 )
 
-func fixtureCouples() *Couples {
-	c := Couples{PeopleNumber: 3}
+func fixtureCouples() *CouplesAnalysis {
+	c := CouplesAnalysis{PeopleNumber: 3}
 	c.Initialize(testRepository)
 	return &c
 }
@@ -20,7 +23,18 @@ func TestCouplesMeta(t *testing.T) {
 	assert.Equal(t, len(c.Provides()), 0)
 	assert.Equal(t, len(c.Requires()), 2)
 	assert.Equal(t, c.Requires()[0], "author")
-	assert.Equal(t, c.Requires()[1], "renamed_changes")
+	assert.Equal(t, c.Requires()[1], "changes")
+	assert.Equal(t, c.Flag(), "couples")
+	assert.Len(t, c.ListConfigurationOptions(), 0)
+}
+
+func TestCouplesRegistration(t *testing.T) {
+	tp, exists := Registry.registered[(&CouplesAnalysis{}).Name()]
+	assert.True(t, exists)
+	assert.Equal(t, tp.Elem().Name(), "CouplesAnalysis")
+	tp, exists = Registry.flags[(&CouplesAnalysis{}).Flag()]
+	assert.True(t, exists)
+	assert.Equal(t, tp.Elem().Name(), "CouplesAnalysis")
 }
 
 func generateChanges(names ...string) object.Changes {
@@ -63,15 +77,15 @@ func TestCouplesConsumeFinalize(t *testing.T) {
 	c := fixtureCouples()
 	deps := map[string]interface{}{}
 	deps["author"] = 0
-	deps["renamed_changes"] = generateChanges("+two", "+four", "+six")
+	deps["changes"] = generateChanges("+two", "+four", "+six")
 	c.Consume(deps)
-	deps["renamed_changes"] = generateChanges("+one", "-two", "=three", ">four>five")
+	deps["changes"] = generateChanges("+one", "-two", "=three", ">four>five")
 	c.Consume(deps)
 	deps["author"] = 1
-	deps["renamed_changes"] = generateChanges("=one", "=three", "-six")
+	deps["changes"] = generateChanges("=one", "=three", "-six")
 	c.Consume(deps)
 	deps["author"] = 2
-	deps["renamed_changes"] = generateChanges("=five")
+	deps["changes"] = generateChanges("=five")
 	c.Consume(deps)
 	assert.Equal(t, len(c.people[0]), 5)
 	assert.Equal(t, c.people[0]["one"], 1)
@@ -141,3 +155,91 @@ func TestCouplesConsumeFinalize(t *testing.T) {
 	assert.Equal(t, cr.FilesMatrix[2][1], int64(2))
 	assert.Equal(t, cr.FilesMatrix[2][2], int64(2))
 }
+
+func TestCouplesSerialize(t *testing.T) {
+	c := fixtureCouples()
+	c.PeopleNumber = 1
+	people := [...]string{"p1", "p2", "p3"}
+	facts := map[string]interface{}{}
+	c.Configure(facts)
+	assert.Equal(t, c.PeopleNumber, 1)
+	facts[FactIdentityDetectorPeopleCount] = 3
+	facts[FactIdentityDetectorReversedPeopleDict] = people[:]
+	c.Configure(facts)
+	assert.Equal(t, c.PeopleNumber, 3)
+	deps := map[string]interface{}{}
+	deps["author"] = 0
+	deps["changes"] = generateChanges("+two", "+four", "+six")
+	c.Consume(deps)
+	deps["changes"] = generateChanges("+one", "-two", "=three", ">four>five")
+	c.Consume(deps)
+	deps["author"] = 1
+	deps["changes"] = generateChanges("=one", "=three", "-six")
+	c.Consume(deps)
+	deps["author"] = 2
+	deps["changes"] = generateChanges("=five")
+	c.Consume(deps)
+	result := c.Finalize().(CouplesResult)
+	buffer := &bytes.Buffer{}
+	c.Serialize(result, false, buffer)
+	assert.Equal(t, buffer.String(), `  files_coocc:
+    index:
+      - "five"
+      - "one"
+      - "three"
+    matrix:
+      - {0: 3, 1: 1, 2: 1}
+      - {0: 1, 1: 2, 2: 2}
+      - {0: 1, 1: 2, 2: 2}
+  people_coocc:
+    index:
+      - "p1"
+      - "p2"
+      - "p3"
+    matrix:
+      - {0: 7, 1: 3, 2: 1}
+      - {0: 3, 1: 3}
+      - {0: 1, 2: 1}
+      - {}
+    author_files:
+      - "p3":
+        - "five"
+      - "p2":
+        - "one"
+        - "three"
+      - "p1":
+        - "five"
+        - "one"
+        - "three"
+`)
+	buffer = &bytes.Buffer{}
+	c.Serialize(result, true, buffer)
+	msg := pb.CouplesAnalysisResults{}
+	proto.Unmarshal(buffer.Bytes(), &msg)
+	assert.Len(t, msg.TouchedFiles.Developers, 3)
+	tmp1 := [...]int32{0, 1, 2}
+	assert.Equal(t, msg.TouchedFiles.Developers[0].Files, tmp1[:])
+	tmp2 := [...]int32{1, 2}
+	assert.Equal(t, msg.TouchedFiles.Developers[1].Files, tmp2[:])
+	tmp3 := [...]int32{0}
+	assert.Equal(t, msg.TouchedFiles.Developers[2].Files, tmp3[:])
+	assert.Equal(t, msg.DeveloperCouples.Index, people[:])
+	assert.Equal(t, msg.DeveloperCouples.Matrix.NumberOfRows, int32(4))
+	assert.Equal(t, msg.DeveloperCouples.Matrix.NumberOfColumns, int32(4))
+	data := [...]int64{7, 3, 1, 3, 3, 1, 1}
+	assert.Equal(t, msg.DeveloperCouples.Matrix.Data, data[:])
+	indices := [...]int32{0, 1, 2, 0, 1, 0, 2}
+	assert.Equal(t, msg.DeveloperCouples.Matrix.Indices, indices[:])
+	indptr := [...]int64{0, 3, 5, 7, 7}
+	assert.Equal(t, msg.DeveloperCouples.Matrix.Indptr, indptr[:])
+	files := [...]string{"five", "one", "three"}
+	assert.Equal(t, msg.FileCouples.Index, files[:])
+	assert.Equal(t, msg.FileCouples.Matrix.NumberOfRows, int32(3))
+	assert.Equal(t, msg.FileCouples.Matrix.NumberOfColumns, int32(3))
+	data2 := [...]int64{3, 1, 1, 1, 2, 2, 1, 2, 2}
+	assert.Equal(t, msg.FileCouples.Matrix.Data, data2[:])
+	indices2 := [...]int32{0, 1, 2, 0, 1, 2, 0, 1, 2}
+	assert.Equal(t, msg.FileCouples.Matrix.Indices, indices2[:])
+	indptr2 := [...]int64{0, 3, 6, 9}
+	assert.Equal(t, msg.FileCouples.Matrix.Indptr, indptr2[:])
+}

+ 8 - 2
day.go

@@ -25,6 +25,12 @@ func (days *DaysSinceStart) Requires() []string {
 	return []string{}
 }
 
+func (days *DaysSinceStart) ListConfigurationOptions() []ConfigurationOption {
+	return []ConfigurationOption{}
+}
+
+func (days *DaysSinceStart) Configure(facts map[string]interface{}) {}
+
 func (days *DaysSinceStart) Initialize(repository *git.Repository) {
 	days.day0 = time.Time{}
 	days.previousDay = 0
@@ -46,6 +52,6 @@ func (days *DaysSinceStart) Consume(deps map[string]interface{}) (map[string]int
 	return map[string]interface{}{"day": day}, nil
 }
 
-func (days *DaysSinceStart) Finalize() interface{} {
-	return nil
+func init() {
+	Registry.Register(&DaysSinceStart{})
 }

+ 10 - 4
day_test.go

@@ -19,12 +19,18 @@ func TestDaysSinceStartMeta(t *testing.T) {
 	assert.Equal(t, len(dss.Provides()), 1)
 	assert.Equal(t, dss.Provides()[0], "day")
 	assert.Equal(t, len(dss.Requires()), 0)
+	assert.Len(t, dss.ListConfigurationOptions(), 0)
+	dss.Configure(nil)
 }
 
-func TestDaysSinceStartFinalize(t *testing.T) {
-	dss := fixtureDaysSinceStart()
-	r := dss.Finalize()
-	assert.Nil(t, r)
+func TestDaysSinceStartRegistration(t *testing.T) {
+	tp, exists := Registry.registered[(&DaysSinceStart{}).Name()]
+	assert.True(t, exists)
+	assert.Equal(t, tp.Elem().Name(), "DaysSinceStart")
+	tps, exists := Registry.provided[(&DaysSinceStart{}).Provides()[0]]
+	assert.True(t, exists)
+	assert.Len(t, tps, 1)
+	assert.Equal(t, tps[0].Elem().Name(), "DaysSinceStart")
 }
 
 func TestDaysSinceStartConsume(t *testing.T) {

+ 12 - 7
diff.go

@@ -31,17 +31,22 @@ func (diff *FileDiff) Provides() []string {
 }
 
 func (diff *FileDiff) Requires() []string {
-	arr := [...]string{"renamed_changes", "blob_cache"}
+	arr := [...]string{"changes", "blob_cache"}
 	return arr[:]
 }
 
-func (diff *FileDiff) Initialize(repository *git.Repository) {
+func (diff *FileDiff) ListConfigurationOptions() []ConfigurationOption {
+	return []ConfigurationOption{}
 }
 
+func (diff *FileDiff) Configure(facts map[string]interface{}) {}
+
+func (diff *FileDiff) Initialize(repository *git.Repository) {}
+
 func (diff *FileDiff) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
 	result := map[string]FileDiffData{}
 	cache := deps["blob_cache"].(map[plumbing.Hash]*object.Blob)
-	tree_diff := deps["renamed_changes"].(object.Changes)
+	tree_diff := deps["changes"].(object.Changes)
 	for _, change := range tree_diff {
 		action, err := change.Action()
 		if err != nil {
@@ -76,10 +81,6 @@ func (diff *FileDiff) Consume(deps map[string]interface{}) (map[string]interface
 	return map[string]interface{}{"file_diff": result}, nil
 }
 
-func (diff *FileDiff) Finalize() interface{} {
-	return nil
-}
-
 func blobToString(file *object.Blob) (string, error) {
 	if file == nil {
 		return "", errors.New("Blob not cached.")
@@ -93,3 +94,7 @@ func blobToString(file *object.Blob) (string, error) {
 	buf.ReadFrom(reader)
 	return buf.String(), nil
 }
+
+func init() {
+	Registry.Register(&FileDiff{})
+}

+ 57 - 0
diff_refiner.go

@@ -0,0 +1,57 @@
+package hercules
+
+import (
+	"gopkg.in/src-d/go-git.v4"
+)
+
+type FileDiffRefiner struct {
+}
+
+func (ref *FileDiffRefiner) Name() string {
+	return "FileDiffRefiner"
+}
+
+func (ref *FileDiffRefiner) Provides() []string {
+	arr := [...]string{"file_diff"}
+	return arr[:]
+}
+
+func (ref *FileDiffRefiner) Requires() []string {
+	arr := [...]string{"file_diff", "changed_uasts"}
+	return arr[:]
+}
+
+func (ref *FileDiffRefiner) Features() []string {
+	arr := [...]string{"uast"}
+	return arr[:]
+}
+
+func (ref *FileDiffRefiner) ListConfigurationOptions() []ConfigurationOption {
+	return []ConfigurationOption{}
+}
+
+func (ref *FileDiffRefiner) Configure(facts map[string]interface{}) {}
+
+func (ref *FileDiffRefiner) Initialize(repository *git.Repository) {
+}
+
+func (ref *FileDiffRefiner) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
+	changesList := deps["changed_uasts"].([]UASTChange)
+	changes := map[string]UASTChange{}
+	for _, change := range changesList {
+		if change.Before != nil && change.After != nil {
+			changes[change.Change.To.Name] = change
+		}
+	}
+	diffs := deps["file_diff"].(map[string]FileDiffData)
+	for fileName, _ /*diff*/ := range diffs {
+		_ /*change*/ = changes[fileName]
+		// TODO: scan diff line by line
+	}
+	result := map[string]FileDiffData{}
+	return map[string]interface{}{"file_diff": result}, nil
+}
+
+func init() {
+	Registry.Register(&FileDiffRefiner{})
+}

+ 17 - 7
diff_test.go

@@ -22,14 +22,24 @@ func TestFileDiffMeta(t *testing.T) {
 	assert.Equal(t, len(fd.Provides()), 1)
 	assert.Equal(t, fd.Provides()[0], "file_diff")
 	assert.Equal(t, len(fd.Requires()), 2)
-	assert.Equal(t, fd.Requires()[0], "renamed_changes")
+	assert.Equal(t, fd.Requires()[0], "changes")
 	assert.Equal(t, fd.Requires()[1], "blob_cache")
+	assert.Len(t, fd.ListConfigurationOptions(), 0)
+	fd.Configure(nil)
 }
 
-func TestFileDiffFinalize(t *testing.T) {
-	fd := fixtureFileDiff()
-	r := fd.Finalize()
-	assert.Nil(t, r)
+func TestFileDiffRegistration(t *testing.T) {
+	tp, exists := Registry.registered[(&FileDiff{}).Name()]
+	assert.True(t, exists)
+	assert.Equal(t, tp.Elem().Name(), "FileDiff")
+	tps, exists := Registry.provided[(&FileDiff{}).Provides()[0]]
+	assert.True(t, exists)
+	assert.True(t, len(tps) >= 1)
+	matched := false
+	for _, tp := range tps {
+		matched = matched || tp.Elem().Name() == "FileDiff"
+	}
+	assert.True(t, matched)
 }
 
 func TestFileDiffConsume(t *testing.T) {
@@ -85,7 +95,7 @@ func TestFileDiffConsume(t *testing.T) {
 		},
 	}, To: object.ChangeEntry{},
 	}
-	deps["renamed_changes"] = changes
+	deps["changes"] = changes
 	res, err := fd.Consume(deps)
 	assert.Nil(t, err)
 	diffs := res["file_diff"].(map[string]FileDiffData)
@@ -142,7 +152,7 @@ func TestFileDiffConsumeInvalidBlob(t *testing.T) {
 			Hash: plumbing.NewHash("334cde09da4afcb74f8d2b3e6fd6cce61228b485"),
 		},
 	}}
-	deps["renamed_changes"] = changes
+	deps["changes"] = changes
 	res, err := fd.Consume(deps)
 	assert.Nil(t, res)
 	assert.NotNil(t, err)

+ 9 - 19
doc.go

@@ -6,10 +6,10 @@ The analysis is expressed in a form of the tree: there are nodes - "pipeline ite
 require some other nodes to be executed prior to selves and in turn provide the data for
 dependent nodes. There are several service items which do not produce any useful
 statistics but rather provide the requirements for other items. The top-level items
-are:
+include:
 
 - BurndownAnalysis - line burndown statistics for project, files and developers.
-- Couples - coupling statistics for files and developers.
+- CouplesAnalysis - coupling statistics for files and developers.
 
 The typical API usage is to initialize the Pipeline class:
 
@@ -19,31 +19,21 @@ The typical API usage is to initialize the Pipeline class:
 	// ...initialize repository...
 	pipeline := hercules.NewPipeline(repository)
 
-Then add the required analysis tree nodes:
+Then add the required analysis:
 
-  pipeline.AddItem(&hercules.BlobCache{})
-	pipeline.AddItem(&hercules.DaysSinceStart{})
-	pipeline.AddItem(&hercules.TreeDiff{})
-	pipeline.AddItem(&hercules.FileDiff{})
-	pipeline.AddItem(&hercules.RenameAnalysis{SimilarityThreshold: 80})
-	pipeline.AddItem(&hercules.IdentityDetector{})
-
-Then initialize BurndownAnalysis:
-
-  burndowner := &hercules.BurndownAnalysis{
+  ba := pipeline.DeployItem(&hercules.BurndownAnalysis{
     Granularity:  30,
 		Sampling:     30,
-  }
-  pipeline.AddItem(burndowner)
+  })
 
-Then execute the analysis tree:
+This call will add all the needed intermediate pipeline items. Then link and execute the analysis tree:
 
-  pipeline.Initialize()
-	result, err := pipeline.Run(commits)
+  pipeline.Initialize(nil)
+	result, err := pipeline.Run(pipeline.Commits())
 
 Finally extract the result:
 
-  burndownResults := result[burndowner].(hercules.BurndownResult)
+  result := result[ba].(hercules.BurndownResult)
 
 The actual usage example is cmd/hercules/main.go - the command line tool's code.
 

+ 27 - 0
doc/dag.dot

@@ -0,0 +1,27 @@
+digraph Hercules {
+  "6 BlobCache" -> "7 [blob_cache]"
+  "0 DaysSinceStart" -> "3 [day]"
+  "10 FileDiff" -> "12 [file_diff]"
+  "16 FileDiffRefiner" -> "17 Burndown"
+  "1 IdentityDetector" -> "4 [author]"
+  "8 RenameAnalysis" -> "17 Burndown"
+  "8 RenameAnalysis" -> "9 Couples"
+  "8 RenameAnalysis" -> "10 FileDiff"
+  "8 RenameAnalysis" -> "11 UAST"
+  "8 RenameAnalysis" -> "14 UASTChanges"
+  "2 TreeDiff" -> "5 [changes]"
+  "11 UAST" -> "13 [uasts]"
+  "14 UASTChanges" -> "15 [changed_uasts]"
+  "4 [author]" -> "17 Burndown"
+  "4 [author]" -> "9 Couples"
+  "7 [blob_cache]" -> "17 Burndown"
+  "7 [blob_cache]" -> "10 FileDiff"
+  "7 [blob_cache]" -> "8 RenameAnalysis"
+  "7 [blob_cache]" -> "11 UAST"
+  "15 [changed_uasts]" -> "16 FileDiffRefiner"
+  "5 [changes]" -> "6 BlobCache"
+  "5 [changes]" -> "8 RenameAnalysis"
+  "3 [day]" -> "17 Burndown"
+  "12 [file_diff]" -> "16 FileDiffRefiner"
+  "13 [uasts]" -> "14 UASTChanges"
+}

BIN
doc/dag.png


+ 1 - 1
file.go

@@ -2,7 +2,7 @@ package hercules
 
 import (
 	"fmt"
-	"gopkg.in/src-d/hercules.v2/rbtree"
+	"gopkg.in/src-d/hercules.v3/rbtree"
 )
 
 // A status is the something we would like to update during File.Update().

+ 1 - 1
file_test.go

@@ -4,7 +4,7 @@ import (
 	"testing"
 
 	"github.com/stretchr/testify/assert"
-	"gopkg.in/src-d/hercules.v2/rbtree"
+	"gopkg.in/src-d/hercules.v3/rbtree"
 )
 
 func updateStatusFile(

+ 55 - 10
identity.go

@@ -14,11 +14,19 @@ type IdentityDetector struct {
 	// Maps email || name  -> developer id.
 	PeopleDict map[string]int
 	// Maps developer id -> description
-	ReversePeopleDict []string
+	ReversedPeopleDict []string
 }
 
-const MISSING_AUTHOR = (1 << 18) - 1
-const SELF_AUTHOR = (1 << 18) - 2
+const (
+	MISSING_AUTHOR   = (1 << 18) - 1
+	SELF_AUTHOR      = (1 << 18) - 2
+	UNMATCHED_AUTHOR = "<unmatched>"
+
+	FactIdentityDetectorPeopleDict         = "IdentityDetector.PeopleDict"
+	FactIdentityDetectorReversedPeopleDict = "IdentityDetector.ReversedPeopleDict"
+	ConfigIdentityDetectorPeopleDictPath   = "IdentityDetector.PeopleDictPath"
+	FactIdentityDetectorPeopleCount        = "IdentityDetector.PeopleCount"
+)
 
 func (id *IdentityDetector) Name() string {
 	return "IdentityDetector"
@@ -33,6 +41,43 @@ func (id *IdentityDetector) Requires() []string {
 	return []string{}
 }
 
+func (id *IdentityDetector) ListConfigurationOptions() []ConfigurationOption {
+	options := [...]ConfigurationOption{{
+		Name:        ConfigIdentityDetectorPeopleDictPath,
+		Description: "Path to the developers' email associations.",
+		Flag:        "people-dict",
+		Type:        StringConfigurationOption,
+		Default:     ""},
+	}
+	return options[:]
+}
+
+func (id *IdentityDetector) Configure(facts map[string]interface{}) {
+	if val, exists := facts[FactIdentityDetectorPeopleDict].(map[string]int); exists {
+		id.PeopleDict = val
+	}
+	if val, exists := facts[FactIdentityDetectorReversedPeopleDict].([]string); exists {
+		id.ReversedPeopleDict = val
+	}
+	if id.PeopleDict == nil || id.ReversedPeopleDict == nil {
+		peopleDictPath, _ := facts[ConfigIdentityDetectorPeopleDictPath].(string)
+		if peopleDictPath != "" {
+			id.LoadPeopleDict(peopleDictPath)
+			facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict) - 1
+		} else {
+			if _, exists := facts[FactPipelineCommits]; !exists {
+				panic("IdentityDetector needs a list of commits to initialize.")
+			}
+			id.GeneratePeopleDict(facts[FactPipelineCommits].([]*object.Commit))
+			facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict)
+		}
+	} else {
+		facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict)
+	}
+	facts[FactIdentityDetectorPeopleDict] = id.PeopleDict
+	facts[FactIdentityDetectorReversedPeopleDict] = id.ReversedPeopleDict
+}
+
 func (id *IdentityDetector) Initialize(repository *git.Repository) {
 }
 
@@ -49,10 +94,6 @@ func (self *IdentityDetector) Consume(deps map[string]interface{}) (map[string]i
 	return map[string]interface{}{"author": id}, nil
 }
 
-func (id *IdentityDetector) Finalize() interface{} {
-	return nil
-}
-
 func (id *IdentityDetector) LoadPeopleDict(path string) error {
 	file, err := os.Open(path)
 	if err != nil {
@@ -71,9 +112,9 @@ func (id *IdentityDetector) LoadPeopleDict(path string) error {
 		reverse_dict = append(reverse_dict, ids[0])
 		size += 1
 	}
-	reverse_dict = append(reverse_dict, "<unmatched>")
+	reverse_dict = append(reverse_dict, UNMATCHED_AUTHOR)
 	id.PeopleDict = dict
-	id.ReversePeopleDict = reverse_dict
+	id.ReversedPeopleDict = reverse_dict
 	return nil
 }
 
@@ -169,5 +210,9 @@ func (id *IdentityDetector) GeneratePeopleDict(commits []*object.Commit) {
 		reverse_dict[val] = strings.Join(names[val], "|") + "|" + strings.Join(emails[val], "|")
 	}
 	id.PeopleDict = dict
-	id.ReversePeopleDict = reverse_dict
+	id.ReversedPeopleDict = reverse_dict
+}
+
+func init() {
+	Registry.Register(&IdentityDetector{})
 }

+ 104 - 19
identity_test.go

@@ -23,8 +23,8 @@ func fixtureIdentityDetector() *IdentityDetector {
 	reversePeopleDict := make([]string, 1)
 	reversePeopleDict[0] = "Vadim"
 	id := IdentityDetector{
-		PeopleDict:        peopleDict,
-		ReversePeopleDict: reversePeopleDict,
+		PeopleDict:         peopleDict,
+		ReversedPeopleDict: reversePeopleDict,
 	}
 	id.Initialize(testRepository)
 	return &id
@@ -36,6 +36,96 @@ func TestIdentityDetectorMeta(t *testing.T) {
 	assert.Equal(t, len(id.Requires()), 0)
 	assert.Equal(t, len(id.Provides()), 1)
 	assert.Equal(t, id.Provides()[0], "author")
+	opts := id.ListConfigurationOptions()
+	assert.Len(t, opts, 1)
+	assert.Equal(t, opts[0].Name, ConfigIdentityDetectorPeopleDictPath)
+}
+
+func TestIdentityDetectorConfigure(t *testing.T) {
+	id := fixtureIdentityDetector()
+	facts := map[string]interface{}{}
+	m1 := map[string]int{}
+	m2 := []string{}
+	facts[FactIdentityDetectorPeopleDict] = m1
+	facts[FactIdentityDetectorReversedPeopleDict] = m2
+	id.Configure(facts)
+	assert.Equal(t, m1, facts[FactIdentityDetectorPeopleDict])
+	assert.Equal(t, m2, facts[FactIdentityDetectorReversedPeopleDict])
+	assert.Equal(t, id.PeopleDict, facts[FactIdentityDetectorPeopleDict])
+	assert.Equal(t, id.ReversedPeopleDict, facts[FactIdentityDetectorReversedPeopleDict])
+	id = fixtureIdentityDetector()
+	tmpf, err := ioutil.TempFile("", "hercules-test-")
+	assert.Nil(t, err)
+	defer os.Remove(tmpf.Name())
+	_, err = tmpf.WriteString(`Egor|egor@sourced.tech
+Vadim|vadim@sourced.tech`)
+	assert.Nil(t, err)
+	assert.Nil(t, tmpf.Close())
+	delete(facts, FactIdentityDetectorPeopleDict)
+	delete(facts, FactIdentityDetectorReversedPeopleDict)
+	facts[ConfigIdentityDetectorPeopleDictPath] = tmpf.Name()
+	id.Configure(facts)
+	assert.Len(t, id.PeopleDict, 2)
+	assert.Len(t, id.ReversedPeopleDict, 1)
+	assert.Equal(t, id.ReversedPeopleDict[0], "Vadim")
+	delete(facts, FactIdentityDetectorPeopleDict)
+	delete(facts, FactIdentityDetectorReversedPeopleDict)
+	id = fixtureIdentityDetector()
+	id.PeopleDict = nil
+	id.Configure(facts)
+	assert.Equal(t, id.PeopleDict, facts[FactIdentityDetectorPeopleDict])
+	assert.Equal(t, id.ReversedPeopleDict, facts[FactIdentityDetectorReversedPeopleDict])
+	assert.Len(t, id.PeopleDict, 4)
+	assert.Len(t, id.ReversedPeopleDict, 3)
+	assert.Equal(t, id.ReversedPeopleDict[0], "Egor")
+	assert.Equal(t, facts[FactIdentityDetectorPeopleCount], 2)
+	delete(facts, FactIdentityDetectorPeopleDict)
+	delete(facts, FactIdentityDetectorReversedPeopleDict)
+	id = fixtureIdentityDetector()
+	id.ReversedPeopleDict = nil
+	id.Configure(facts)
+	assert.Equal(t, id.PeopleDict, facts[FactIdentityDetectorPeopleDict])
+	assert.Equal(t, id.ReversedPeopleDict, facts[FactIdentityDetectorReversedPeopleDict])
+	assert.Len(t, id.PeopleDict, 4)
+	assert.Len(t, id.ReversedPeopleDict, 3)
+	assert.Equal(t, id.ReversedPeopleDict[0], "Egor")
+	assert.Equal(t, facts[FactIdentityDetectorPeopleCount], 2)
+	delete(facts, FactIdentityDetectorPeopleDict)
+	delete(facts, FactIdentityDetectorReversedPeopleDict)
+	delete(facts, ConfigIdentityDetectorPeopleDictPath)
+	commits := make([]*object.Commit, 0)
+	iter, err := testRepository.CommitObjects()
+	commit, err := iter.Next()
+	for ; err != io.EOF; commit, err = iter.Next() {
+		if err != nil {
+			panic(err)
+		}
+		commits = append(commits, commit)
+	}
+	facts["commits"] = commits
+	id = fixtureIdentityDetector()
+	id.PeopleDict = nil
+	id.ReversedPeopleDict = nil
+	id.Configure(facts)
+	assert.Equal(t, id.PeopleDict, facts[FactIdentityDetectorPeopleDict])
+	assert.Equal(t, id.ReversedPeopleDict, facts[FactIdentityDetectorReversedPeopleDict])
+	assert.True(t, len(id.PeopleDict) >= 3)
+	assert.True(t, len(id.ReversedPeopleDict) >= 4)
+}
+
+func TestIdentityDetectorRegistration(t *testing.T) {
+	tp, exists := Registry.registered[(&IdentityDetector{}).Name()]
+	assert.True(t, exists)
+	assert.Equal(t, tp.Elem().Name(), "IdentityDetector")
+	tps, exists := Registry.provided[(&IdentityDetector{}).Provides()[0]]
+	assert.True(t, exists)
+	assert.Len(t, tps, 1)
+	assert.Equal(t, tps[0].Elem().Name(), "IdentityDetector")
+}
+
+func TestIdentityDetectorConfigureEmpty(t *testing.T) {
+	id := IdentityDetector{}
+	assert.Panics(t, func() {id.Configure(map[string]interface{}{})})
 }
 
 func TestIdentityDetectorConsume(t *testing.T) {
@@ -66,15 +156,15 @@ func TestLoadPeopleDict(t *testing.T) {
 	assert.Contains(t, id.PeopleDict, "another@one.com")
 	assert.Contains(t, id.PeopleDict, "máximo cuadros")
 	assert.Contains(t, id.PeopleDict, "maximo@sourced.tech")
-	assert.Equal(t, len(id.ReversePeopleDict), 4)
-	assert.Equal(t, id.ReversePeopleDict[0], "Linus Torvalds")
-	assert.Equal(t, id.ReversePeopleDict[1], "Vadim Markovtsev")
-	assert.Equal(t, id.ReversePeopleDict[2], "Máximo Cuadros")
-	assert.Equal(t, id.ReversePeopleDict[3], "<unmatched>")
+	assert.Equal(t, len(id.ReversedPeopleDict), 4)
+	assert.Equal(t, id.ReversedPeopleDict[0], "Linus Torvalds")
+	assert.Equal(t, id.ReversedPeopleDict[1], "Vadim Markovtsev")
+	assert.Equal(t, id.ReversedPeopleDict[2], "Máximo Cuadros")
+	assert.Equal(t, id.ReversedPeopleDict[3], UNMATCHED_AUTHOR)
 }
 
 /*
-// internal compiler error
+// internal compiler error in 1.8
 func TestGeneratePeopleDict(t *testing.T) {
 	id := fixtureIdentityDetector()
 	commits := make([]*object.Commit, 0)
@@ -128,7 +218,7 @@ func TestGeneratePeopleDict(t *testing.T) {
 	}
 	id.GeneratePeopleDict(commits)
 	assert.True(t, len(id.PeopleDict) >= 7)
-	assert.True(t, len(id.ReversePeopleDict) >= 3)
+	assert.True(t, len(id.ReversedPeopleDict) >= 3)
 	assert.Equal(t, id.PeopleDict["vadim markovtsev"], 0)
 	assert.Equal(t, id.PeopleDict["vadim@sourced.tech"], 0)
 	assert.Equal(t, id.PeopleDict["gmarkhor@gmail.com"], 0)
@@ -136,15 +226,10 @@ func TestGeneratePeopleDict(t *testing.T) {
 	assert.Equal(t, id.PeopleDict["bzz@apache.org"], 1)
 	assert.Equal(t, id.PeopleDict["máximo cuadros"], 2)
 	assert.Equal(t, id.PeopleDict["mcuadros@gmail.com"], 2)
-	assert.Equal(t, id.ReversePeopleDict[0], "vadim markovtsev|gmarkhor@gmail.com|vadim@sourced.tech")
-	assert.Equal(t, id.ReversePeopleDict[1], "alexander bezzubov|bzz@apache.org")
-	assert.Equal(t, id.ReversePeopleDict[2], "máximo cuadros|mcuadros@gmail.com")
-}
-
-func TestIdentityDetectorFinalize(t *testing.T) {
-	id := fixtureIdentityDetector()
-	res := id.Finalize()
-	assert.Nil(t, res)
+	assert.Equal(t, id.ReversedPeopleDict[0], "vadim markovtsev|gmarkhor@gmail.com|vadim@sourced.tech")
+	assert.Equal(t, id.ReversedPeopleDict[1], "alexander bezzubov|bzz@apache.org")
+	assert.Equal(t, id.ReversedPeopleDict[2], "máximo cuadros|mcuadros@gmail.com")
+	assert.NotEqual(t, id.ReversedPeopleDict[len(id.ReversedPeopleDict)-1], UNMATCHED_AUTHOR)
 }
 
 func TestLoadPeopleDictInvalidPath(t *testing.T) {
@@ -277,6 +362,6 @@ func TestGeneratePeopleDictMailmap(t *testing.T) {
 		"Strange Guy <vadim@sourced.tech>\nVadim Markovtsev <vadim@sourced.tech> Strange Guy <vadim@sourced.tech>")
 	commits = append(commits, fake)
 	id.GeneratePeopleDict(commits)
-	assert.Contains(t, id.ReversePeopleDict,
+	assert.Contains(t, id.ReversedPeopleDict,
 		"strange guy|vadim markovtsev|gmarkhor@gmail.com|vadim@sourced.tech")
 }

+ 95 - 35
labours.py

@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 import argparse
 from datetime import datetime, timedelta
+from importlib import import_module
 import io
 import json
 import os
@@ -26,6 +27,12 @@ if sys.version_info[0] < 3:
     input = raw_input
 
 
+PB_MESSAGES = {
+    "Burndown": "pb.pb_pb2.BurndownAnalysisResults",
+    "Couples": "pb.pb_pb2.CouplesAnalysisResults",
+}
+
+
 def parse_args():
     parser = argparse.ArgumentParser()
     parser.add_argument("-o", "--output", default="",
@@ -45,9 +52,9 @@ def parse_args():
                         help="Occupy 100%% height for every measurement.")
     parser.add_argument("--couples-tmp-dir", help="Temporary directory to work with couples.")
     parser.add_argument("-m", "--mode",
-                        choices=["project", "file", "person", "churn_matrix", "ownership", "couples",
-                                 "all"],
-                        default="project", help="What to plot.")
+                        choices=["project", "file", "person", "churn_matrix", "ownership",
+                                 "couples", "all"],
+                        help="What to plot.")
     parser.add_argument(
         "--resample", default="year",
         help="The way to resample the time series. Possible values are: "
@@ -72,6 +79,9 @@ class Reader(object):
     def get_header(self):
         raise NotImplementedError
 
+    def get_burndown_parameters(self):
+        raise NotImplementedError
+
     def get_project_burndown(self):
         raise NotImplementedError
 
@@ -115,35 +125,43 @@ class YamlReader(Reader):
         self.data = data
 
     def get_name(self):
-        return next(iter(self.data["project"]))
+        return next(iter(self.data["Burndown"]["project"]))
 
     def get_header(self):
-        header = self.data["burndown"]
-        return header["begin"], header["end"], header["sampling"], header["granularity"]
+        header = self.data["hercules"]
+        return header["begin_unix_time"], header["end_unix_time"]
+
+    def get_burndown_parameters(self):
+        header = self.data["Burndown"]
+        return header["sampling"], header["granularity"]
 
     def get_project_burndown(self):
-        name, matrix = next(iter(self.data["project"].items()))
-        return name, self._parse_burndown_matrix(matrix).T
+        return self.data["hercules"]["repository"], \
+               self._parse_burndown_matrix(self.data["Burndown"]["project"]).T
 
     def get_files_burndown(self):
-        return [(p[0], self._parse_burndown_matrix(p[1]).T) for p in self.data["files"].items()]
+        return [(p[0], self._parse_burndown_matrix(p[1]).T)
+                for p in self.data["Burndown"]["files"].items()]
 
     def get_people_burndown(self):
-        return [(p[0], self._parse_burndown_matrix(p[1]).T) for p in self.data["people"].items()]
+        return [(p[0], self._parse_burndown_matrix(p[1]).T)
+                for p in self.data["Burndown"]["people"].items()]
 
     def get_ownership_burndown(self):
-        return self.data["people_sequence"], {p[0]: self._parse_burndown_matrix(p[1])
-                                              for p in self.data["people"].items()}
+        return self.data["Burndown"]["people_sequence"].copy(),\
+               {p[0]: self._parse_burndown_matrix(p[1])
+                for p in self.data["Burndown"]["people"].items()}
 
     def get_people_interaction(self):
-        return self.data["people_sequence"], self._parse_burndown_matrix(self.data["people_interaction"])
+        return self.data["Burndown"]["people_sequence"].copy(), \
+               self._parse_burndown_matrix(self.data["Burndown"]["people_interaction"])
 
     def get_files_coocc(self):
-        coocc = self.data["files_coocc"]
+        coocc = self.data["Couples"]["files_coocc"]
         return coocc["index"], self._parse_coocc_matrix(coocc["matrix"])
 
     def get_people_coocc(self):
-        coocc = self.data["people_coocc"]
+        coocc = self.data["Couples"]["people_coocc"]
         return coocc["index"], self._parse_coocc_matrix(coocc["matrix"])
 
     def _parse_burndown_matrix(self, matrix):
@@ -172,38 +190,52 @@ class ProtobufReader(Reader):
                 self.data.ParseFromString(fin.read())
         else:
             self.data.ParseFromString(sys.stdin.buffer.read())
+        self.contents = {}
+        for key, val in self.data.contents.items():
+            try:
+                mod, name = PB_MESSAGES[key].rsplit(".", 1)
+            except KeyError:
+                sys.stderr.write("Warning: there is no registered PB decoder for %s\n" % key)
+                continue
+            cls = getattr(import_module(mod), name)
+            self.contents[key] = msg = cls()
+            msg.ParseFromString(val)
 
     def get_name(self):
         return self.data.header.repository
 
     def get_header(self):
         header = self.data.header
-        return header.begin_unix_time, header.end_unix_time, \
-            header.sampling, header.granularity
+        return header.begin_unix_time, header.end_unix_time
+
+    def get_burndown_parameters(self):
+        burndown = self.contents["Burndown"]
+        return burndown.sampling, burndown.granularity
 
     def get_project_burndown(self):
-        return self._parse_burndown_matrix(self.data.burndown_project)
+        return self._parse_burndown_matrix(self.contents["Burndown"].project)
 
     def get_files_burndown(self):
-        return [self._parse_burndown_matrix(i) for i in self.data.burndown_files]
+        return [self._parse_burndown_matrix(i) for i in self.contents["Burndown"].files]
 
     def get_people_burndown(self):
-        return [self._parse_burndown_matrix(i) for i in self.data.burndown_developers]
+        return [self._parse_burndown_matrix(i) for i in self.contents["Burndown"].people]
 
     def get_ownership_burndown(self):
         people = self.get_people_burndown()
         return [p[0] for p in people], {p[0]: p[1].T for p in people}
 
     def get_people_interaction(self):
-        return [i.name for i in self.data.burndown_developers], \
-            self._parse_sparse_matrix(self.data.developers_interaction).toarray()
+        burndown = self.contents["Burndown"]
+        return [i.name for i in burndown.people], \
+            self._parse_sparse_matrix(burndown.people_interaction).toarray()
 
     def get_files_coocc(self):
-        node = self.data.file_couples
+        node = self.contents["Couples"].file_couples
         return list(node.index), self._parse_sparse_matrix(node.matrix)
 
     def get_people_coocc(self):
-        node = self.data.developer_couples
+        node = self.contents["Couples"].developer_couples
         return list(node.index), self._parse_sparse_matrix(node.matrix)
 
     def _parse_burndown_matrix(self, matrix):
@@ -678,7 +710,10 @@ def train_embeddings(index, matrix, tmpdir, shard_size=IDEAL_SHARD_SIZE):
         print("Training Swivel model...")
         swivel.FLAGS.submatrix_rows = shard_size
         swivel.FLAGS.submatrix_cols = shard_size
-        if len(meta_index) <= IDEAL_SHARD_SIZE:
+        if len(meta_index) <= IDEAL_SHARD_SIZE / 16:
+            embedding_size = 50
+            num_epochs = 20000
+        elif len(meta_index) <= IDEAL_SHARD_SIZE:
             embedding_size = 50
             num_epochs = 10000
         elif len(meta_index) <= IDEAL_SHARD_SIZE * 2:
@@ -816,40 +851,65 @@ def main():
     header = reader.get_header()
     name = reader.get_name()
 
-    files_warning = "Files stats were not collected. Re-run hercules with -files."
-    people_warning = "People stats were not collected. Re-run hercules with -people."
+    burndown_warning = "Burndown stats were not collected. Re-run hercules with -burndown."
+    burndown_files_warning = \
+        "Burndown stats for files were not collected. Re-run hercules with " \
+        "-burndown -burndown-files."
+    burndown_people_warning = \
+        "Burndown stats for people were not collected. Re-run hercules with " \
+        "-burndown -burndown-people."
     couples_warning = "Coupling stats were not collected. Re-run hercules with -couples."
 
     def project_burndown():
+        try:
+            full_header = header + reader.get_burndown_parameters()
+        except KeyError:
+            print("project: " + burndown_warning)
+            return
         plot_burndown(args, "project",
-                      *load_burndown(header, *reader.get_project_burndown(),
+                      *load_burndown(full_header, *reader.get_project_burndown(),
                                      resample=args.resample))
 
     def files_burndown():
         try:
-            plot_many_burndown(args, "file", header, reader.get_files_burndown())
+            full_header = header + reader.get_burndown_parameters()
+        except KeyError:
+            print(burndown_warning)
+            return
+        try:
+            plot_many_burndown(args, "file", full_header, reader.get_files_burndown())
         except KeyError:
-            print(files_warning)
+            print("files: " + burndown_files_warning)
 
     def people_burndown():
         try:
-            plot_many_burndown(args, "person", header, reader.get_people_burndown())
+            full_header = header + reader.get_burndown_parameters()
         except KeyError:
-            print(people_warning)
+            print(burndown_warning)
+            return
+        try:
+            plot_many_burndown(args, "person", full_header, reader.get_people_burndown())
+        except KeyError:
+            print("people: " + burndown_people_warning)
 
     def churn_matrix():
         try:
             plot_churn_matrix(args, name, *load_churn_matrix(
                 *reader.get_people_interaction(), max_people=args.max_people))
         except KeyError:
-            print(people_warning)
+            print("churn_matrix: " + burndown_people_warning)
 
     def ownership_burndown():
         try:
+            full_header = header + reader.get_burndown_parameters()
+        except KeyError:
+            print(burndown_warning)
+            return
+        try:
             plot_ownership(args, name, *load_ownership(
-                header, *reader.get_ownership_burndown(), max_people=args.max_people))
+                full_header, *reader.get_ownership_burndown(), max_people=args.max_people))
         except KeyError:
-            print(people_warning)
+            print("ownership: " + burndown_people_warning)
 
     def couples():
         try:

+ 211 - 95
pb/pb.pb.go

@@ -11,10 +11,14 @@ It has these top-level messages:
 	Metadata
 	BurndownSparseMatrixRow
 	BurndownSparseMatrix
+	BurndownAnalysisResults
 	CompressedSparseRowMatrix
 	Couples
 	TouchedFiles
 	DeveloperTouchedFiles
+	CouplesAnalysisResults
+	UASTChange
+	UASTChangesSaverResults
 	AnalysisResults
 */
 package pb
@@ -37,18 +41,16 @@ const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package
 type Metadata struct {
 	// this format is versioned
 	Version int32 `protobuf:"varint,1,opt,name=version,proto3" json:"version,omitempty"`
-	// complete command line used to write this message
-	Cmdline string `protobuf:"bytes,2,opt,name=cmdline,proto3" json:"cmdline,omitempty"`
+	// git hash of the revision from which Hercules is built
+	Hash string `protobuf:"bytes,2,opt,name=hash,proto3" json:"hash,omitempty"`
 	// repository's name
 	Repository string `protobuf:"bytes,3,opt,name=repository,proto3" json:"repository,omitempty"`
 	// timestamp of the first analysed commit
 	BeginUnixTime int64 `protobuf:"varint,4,opt,name=begin_unix_time,json=beginUnixTime,proto3" json:"begin_unix_time,omitempty"`
 	// timestamp of the last analysed commit
 	EndUnixTime int64 `protobuf:"varint,5,opt,name=end_unix_time,json=endUnixTime,proto3" json:"end_unix_time,omitempty"`
-	// how many days are in each band [burndown_project, burndown_file, burndown_developer]
-	Granularity int32 `protobuf:"varint,6,opt,name=granularity,proto3" json:"granularity,omitempty"`
-	// how frequently we measure the state of each band [burndown_project, burndown_file, burndown_developer]
-	Sampling int32 `protobuf:"varint,7,opt,name=sampling,proto3" json:"sampling,omitempty"`
+	// number of processed commits
+	Commits int32 `protobuf:"varint,6,opt,name=commits,proto3" json:"commits,omitempty"`
 }
 
 func (m *Metadata) Reset()                    { *m = Metadata{} }
@@ -63,9 +65,9 @@ func (m *Metadata) GetVersion() int32 {
 	return 0
 }
 
-func (m *Metadata) GetCmdline() string {
+func (m *Metadata) GetHash() string {
 	if m != nil {
-		return m.Cmdline
+		return m.Hash
 	}
 	return ""
 }
@@ -91,16 +93,9 @@ func (m *Metadata) GetEndUnixTime() int64 {
 	return 0
 }
 
-func (m *Metadata) GetGranularity() int32 {
-	if m != nil {
-		return m.Granularity
-	}
-	return 0
-}
-
-func (m *Metadata) GetSampling() int32 {
+func (m *Metadata) GetCommits() int32 {
 	if m != nil {
-		return m.Sampling
+		return m.Commits
 	}
 	return 0
 }
@@ -164,6 +159,68 @@ func (m *BurndownSparseMatrix) GetRows() []*BurndownSparseMatrixRow {
 	return nil
 }
 
+type BurndownAnalysisResults struct {
+	// how many days are in each band [burndown_project, burndown_file, burndown_developer]
+	Granularity int32 `protobuf:"varint,1,opt,name=granularity,proto3" json:"granularity,omitempty"`
+	// how frequently we measure the state of each band [burndown_project, burndown_file, burndown_developer]
+	Sampling int32 `protobuf:"varint,2,opt,name=sampling,proto3" json:"sampling,omitempty"`
+	// always exists
+	Project *BurndownSparseMatrix `protobuf:"bytes,3,opt,name=project" json:"project,omitempty"`
+	// this is included if `-burndown-files` was specified
+	Files []*BurndownSparseMatrix `protobuf:"bytes,4,rep,name=files" json:"files,omitempty"`
+	// these two are included if `-burndown-people` was specified
+	People []*BurndownSparseMatrix `protobuf:"bytes,5,rep,name=people" json:"people,omitempty"`
+	// rows and cols order correspond to `burndown_developer`
+	PeopleInteraction *CompressedSparseRowMatrix `protobuf:"bytes,6,opt,name=people_interaction,json=peopleInteraction" json:"people_interaction,omitempty"`
+}
+
+func (m *BurndownAnalysisResults) Reset()                    { *m = BurndownAnalysisResults{} }
+func (m *BurndownAnalysisResults) String() string            { return proto.CompactTextString(m) }
+func (*BurndownAnalysisResults) ProtoMessage()               {}
+func (*BurndownAnalysisResults) Descriptor() ([]byte, []int) { return fileDescriptorPb, []int{3} }
+
+func (m *BurndownAnalysisResults) GetGranularity() int32 {
+	if m != nil {
+		return m.Granularity
+	}
+	return 0
+}
+
+func (m *BurndownAnalysisResults) GetSampling() int32 {
+	if m != nil {
+		return m.Sampling
+	}
+	return 0
+}
+
+func (m *BurndownAnalysisResults) GetProject() *BurndownSparseMatrix {
+	if m != nil {
+		return m.Project
+	}
+	return nil
+}
+
+func (m *BurndownAnalysisResults) GetFiles() []*BurndownSparseMatrix {
+	if m != nil {
+		return m.Files
+	}
+	return nil
+}
+
+func (m *BurndownAnalysisResults) GetPeople() []*BurndownSparseMatrix {
+	if m != nil {
+		return m.People
+	}
+	return nil
+}
+
+func (m *BurndownAnalysisResults) GetPeopleInteraction() *CompressedSparseRowMatrix {
+	if m != nil {
+		return m.PeopleInteraction
+	}
+	return nil
+}
+
 type CompressedSparseRowMatrix struct {
 	NumberOfRows    int32 `protobuf:"varint,1,opt,name=number_of_rows,json=numberOfRows,proto3" json:"number_of_rows,omitempty"`
 	NumberOfColumns int32 `protobuf:"varint,2,opt,name=number_of_columns,json=numberOfColumns,proto3" json:"number_of_columns,omitempty"`
@@ -176,7 +233,7 @@ type CompressedSparseRowMatrix struct {
 func (m *CompressedSparseRowMatrix) Reset()                    { *m = CompressedSparseRowMatrix{} }
 func (m *CompressedSparseRowMatrix) String() string            { return proto.CompactTextString(m) }
 func (*CompressedSparseRowMatrix) ProtoMessage()               {}
-func (*CompressedSparseRowMatrix) Descriptor() ([]byte, []int) { return fileDescriptorPb, []int{3} }
+func (*CompressedSparseRowMatrix) Descriptor() ([]byte, []int) { return fileDescriptorPb, []int{4} }
 
 func (m *CompressedSparseRowMatrix) GetNumberOfRows() int32 {
 	if m != nil {
@@ -223,7 +280,7 @@ type Couples struct {
 func (m *Couples) Reset()                    { *m = Couples{} }
 func (m *Couples) String() string            { return proto.CompactTextString(m) }
 func (*Couples) ProtoMessage()               {}
-func (*Couples) Descriptor() ([]byte, []int) { return fileDescriptorPb, []int{4} }
+func (*Couples) Descriptor() ([]byte, []int) { return fileDescriptorPb, []int{5} }
 
 func (m *Couples) GetIndex() []string {
 	if m != nil {
@@ -246,7 +303,7 @@ type TouchedFiles struct {
 func (m *TouchedFiles) Reset()                    { *m = TouchedFiles{} }
 func (m *TouchedFiles) String() string            { return proto.CompactTextString(m) }
 func (*TouchedFiles) ProtoMessage()               {}
-func (*TouchedFiles) Descriptor() ([]byte, []int) { return fileDescriptorPb, []int{5} }
+func (*TouchedFiles) Descriptor() ([]byte, []int) { return fileDescriptorPb, []int{6} }
 
 func (m *TouchedFiles) GetFiles() []int32 {
 	if m != nil {
@@ -263,7 +320,7 @@ type DeveloperTouchedFiles struct {
 func (m *DeveloperTouchedFiles) Reset()                    { *m = DeveloperTouchedFiles{} }
 func (m *DeveloperTouchedFiles) String() string            { return proto.CompactTextString(m) }
 func (*DeveloperTouchedFiles) ProtoMessage()               {}
-func (*DeveloperTouchedFiles) Descriptor() ([]byte, []int) { return fileDescriptorPb, []int{6} }
+func (*DeveloperTouchedFiles) Descriptor() ([]byte, []int) { return fileDescriptorPb, []int{7} }
 
 func (m *DeveloperTouchedFiles) GetDevelopers() []*TouchedFiles {
 	if m != nil {
@@ -272,79 +329,123 @@ func (m *DeveloperTouchedFiles) GetDevelopers() []*TouchedFiles {
 	return nil
 }
 
-type AnalysisResults struct {
-	// these two are always included
-	Header          *Metadata             `protobuf:"bytes,1,opt,name=header" json:"header,omitempty"`
-	BurndownProject *BurndownSparseMatrix `protobuf:"bytes,2,opt,name=burndown_project,json=burndownProject" json:"burndown_project,omitempty"`
-	// this is included if `-files` was specified
-	BurndownFiles []*BurndownSparseMatrix `protobuf:"bytes,3,rep,name=burndown_files,json=burndownFiles" json:"burndown_files,omitempty"`
-	// these two are included if `-people` was specified
-	BurndownDevelopers []*BurndownSparseMatrix `protobuf:"bytes,4,rep,name=burndown_developers,json=burndownDevelopers" json:"burndown_developers,omitempty"`
-	// rows and cols order correspond to `burndown_developer`
-	DevelopersInteraction *CompressedSparseRowMatrix `protobuf:"bytes,5,opt,name=developers_interaction,json=developersInteraction" json:"developers_interaction,omitempty"`
-	// these three are included if `-couples` was specified
+type CouplesAnalysisResults struct {
 	FileCouples      *Couples               `protobuf:"bytes,6,opt,name=file_couples,json=fileCouples" json:"file_couples,omitempty"`
 	DeveloperCouples *Couples               `protobuf:"bytes,7,opt,name=developer_couples,json=developerCouples" json:"developer_couples,omitempty"`
 	TouchedFiles     *DeveloperTouchedFiles `protobuf:"bytes,8,opt,name=touched_files,json=touchedFiles" json:"touched_files,omitempty"`
 }
 
-func (m *AnalysisResults) Reset()                    { *m = AnalysisResults{} }
-func (m *AnalysisResults) String() string            { return proto.CompactTextString(m) }
-func (*AnalysisResults) ProtoMessage()               {}
-func (*AnalysisResults) Descriptor() ([]byte, []int) { return fileDescriptorPb, []int{7} }
+func (m *CouplesAnalysisResults) Reset()                    { *m = CouplesAnalysisResults{} }
+func (m *CouplesAnalysisResults) String() string            { return proto.CompactTextString(m) }
+func (*CouplesAnalysisResults) ProtoMessage()               {}
+func (*CouplesAnalysisResults) Descriptor() ([]byte, []int) { return fileDescriptorPb, []int{8} }
 
-func (m *AnalysisResults) GetHeader() *Metadata {
+func (m *CouplesAnalysisResults) GetFileCouples() *Couples {
 	if m != nil {
-		return m.Header
+		return m.FileCouples
 	}
 	return nil
 }
 
-func (m *AnalysisResults) GetBurndownProject() *BurndownSparseMatrix {
+func (m *CouplesAnalysisResults) GetDeveloperCouples() *Couples {
 	if m != nil {
-		return m.BurndownProject
+		return m.DeveloperCouples
 	}
 	return nil
 }
 
-func (m *AnalysisResults) GetBurndownFiles() []*BurndownSparseMatrix {
+func (m *CouplesAnalysisResults) GetTouchedFiles() *DeveloperTouchedFiles {
 	if m != nil {
-		return m.BurndownFiles
+		return m.TouchedFiles
 	}
 	return nil
 }
 
-func (m *AnalysisResults) GetBurndownDevelopers() []*BurndownSparseMatrix {
+type UASTChange struct {
+	FileName   string `protobuf:"bytes,1,opt,name=file_name,json=fileName,proto3" json:"file_name,omitempty"`
+	SrcBefore  string `protobuf:"bytes,2,opt,name=src_before,json=srcBefore,proto3" json:"src_before,omitempty"`
+	SrcAfter   string `protobuf:"bytes,3,opt,name=src_after,json=srcAfter,proto3" json:"src_after,omitempty"`
+	UastBefore string `protobuf:"bytes,4,opt,name=uast_before,json=uastBefore,proto3" json:"uast_before,omitempty"`
+	UastAfter  string `protobuf:"bytes,5,opt,name=uast_after,json=uastAfter,proto3" json:"uast_after,omitempty"`
+}
+
+func (m *UASTChange) Reset()                    { *m = UASTChange{} }
+func (m *UASTChange) String() string            { return proto.CompactTextString(m) }
+func (*UASTChange) ProtoMessage()               {}
+func (*UASTChange) Descriptor() ([]byte, []int) { return fileDescriptorPb, []int{9} }
+
+func (m *UASTChange) GetFileName() string {
 	if m != nil {
-		return m.BurndownDevelopers
+		return m.FileName
 	}
-	return nil
+	return ""
 }
 
-func (m *AnalysisResults) GetDevelopersInteraction() *CompressedSparseRowMatrix {
+func (m *UASTChange) GetSrcBefore() string {
 	if m != nil {
-		return m.DevelopersInteraction
+		return m.SrcBefore
 	}
-	return nil
+	return ""
 }
 
-func (m *AnalysisResults) GetFileCouples() *Couples {
+func (m *UASTChange) GetSrcAfter() string {
 	if m != nil {
-		return m.FileCouples
+		return m.SrcAfter
+	}
+	return ""
+}
+
+func (m *UASTChange) GetUastBefore() string {
+	if m != nil {
+		return m.UastBefore
+	}
+	return ""
+}
+
+func (m *UASTChange) GetUastAfter() string {
+	if m != nil {
+		return m.UastAfter
+	}
+	return ""
+}
+
+type UASTChangesSaverResults struct {
+	Changes []*UASTChange `protobuf:"bytes,1,rep,name=changes" json:"changes,omitempty"`
+}
+
+func (m *UASTChangesSaverResults) Reset()                    { *m = UASTChangesSaverResults{} }
+func (m *UASTChangesSaverResults) String() string            { return proto.CompactTextString(m) }
+func (*UASTChangesSaverResults) ProtoMessage()               {}
+func (*UASTChangesSaverResults) Descriptor() ([]byte, []int) { return fileDescriptorPb, []int{10} }
+
+func (m *UASTChangesSaverResults) GetChanges() []*UASTChange {
+	if m != nil {
+		return m.Changes
 	}
 	return nil
 }
 
-func (m *AnalysisResults) GetDeveloperCouples() *Couples {
+type AnalysisResults struct {
+	Header *Metadata `protobuf:"bytes,1,opt,name=header" json:"header,omitempty"`
+	// the mapped values are dynamic messages which require the second parsing pass.
+	Contents map[string][]byte `protobuf:"bytes,2,rep,name=contents" json:"contents,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
+}
+
+func (m *AnalysisResults) Reset()                    { *m = AnalysisResults{} }
+func (m *AnalysisResults) String() string            { return proto.CompactTextString(m) }
+func (*AnalysisResults) ProtoMessage()               {}
+func (*AnalysisResults) Descriptor() ([]byte, []int) { return fileDescriptorPb, []int{11} }
+
+func (m *AnalysisResults) GetHeader() *Metadata {
 	if m != nil {
-		return m.DeveloperCouples
+		return m.Header
 	}
 	return nil
 }
 
-func (m *AnalysisResults) GetTouchedFiles() *DeveloperTouchedFiles {
+func (m *AnalysisResults) GetContents() map[string][]byte {
 	if m != nil {
-		return m.TouchedFiles
+		return m.Contents
 	}
 	return nil
 }
@@ -353,55 +454,70 @@ func init() {
 	proto.RegisterType((*Metadata)(nil), "Metadata")
 	proto.RegisterType((*BurndownSparseMatrixRow)(nil), "BurndownSparseMatrixRow")
 	proto.RegisterType((*BurndownSparseMatrix)(nil), "BurndownSparseMatrix")
+	proto.RegisterType((*BurndownAnalysisResults)(nil), "BurndownAnalysisResults")
 	proto.RegisterType((*CompressedSparseRowMatrix)(nil), "CompressedSparseRowMatrix")
 	proto.RegisterType((*Couples)(nil), "Couples")
 	proto.RegisterType((*TouchedFiles)(nil), "TouchedFiles")
 	proto.RegisterType((*DeveloperTouchedFiles)(nil), "DeveloperTouchedFiles")
+	proto.RegisterType((*CouplesAnalysisResults)(nil), "CouplesAnalysisResults")
+	proto.RegisterType((*UASTChange)(nil), "UASTChange")
+	proto.RegisterType((*UASTChangesSaverResults)(nil), "UASTChangesSaverResults")
 	proto.RegisterType((*AnalysisResults)(nil), "AnalysisResults")
 }
 
 func init() { proto.RegisterFile("pb/pb.proto", fileDescriptorPb) }
 
 var fileDescriptorPb = []byte{
-	// 634 bytes of a gzipped FileDescriptorProto
-	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x54, 0x4f, 0x6f, 0xd3, 0x4e,
-	0x10, 0x95, 0x6b, 0x3b, 0x7f, 0xc6, 0x49, 0xd3, 0xee, 0xaf, 0xed, 0xcf, 0xf4, 0x80, 0x8c, 0x55,
-	0xa1, 0x88, 0x3f, 0x46, 0x4a, 0xc5, 0x09, 0x0e, 0x40, 0x51, 0x25, 0x0e, 0x15, 0xb0, 0x2d, 0x67,
-	0xcb, 0x89, 0xb7, 0xed, 0x22, 0x7b, 0xd7, 0xda, 0x5d, 0x37, 0xe9, 0x57, 0xe2, 0xca, 0x97, 0xe2,
-	0xc8, 0x47, 0x40, 0x5e, 0x7b, 0x1d, 0x03, 0x29, 0x70, 0xf3, 0x9b, 0x79, 0x6f, 0x3c, 0x6f, 0x66,
-	0x6c, 0xf0, 0x8a, 0xf9, 0xb3, 0x62, 0x1e, 0x15, 0x82, 0x2b, 0x1e, 0x7e, 0xb3, 0x60, 0x70, 0x46,
-	0x54, 0x92, 0x26, 0x2a, 0x41, 0x3e, 0xf4, 0x6f, 0x88, 0x90, 0x94, 0x33, 0xdf, 0x0a, 0xac, 0xa9,
-	0x8b, 0x0d, 0xac, 0x32, 0x8b, 0x3c, 0xcd, 0x28, 0x23, 0xfe, 0x56, 0x60, 0x4d, 0x87, 0xd8, 0x40,
-	0x74, 0x1f, 0x40, 0x90, 0x82, 0x4b, 0xaa, 0xb8, 0xb8, 0xf5, 0x6d, 0x9d, 0xec, 0x44, 0xd0, 0x43,
-	0x98, 0xcc, 0xc9, 0x15, 0x65, 0x71, 0xc9, 0xe8, 0x2a, 0x56, 0x34, 0x27, 0xbe, 0x13, 0x58, 0x53,
-	0x1b, 0x8f, 0x75, 0xf8, 0x13, 0xa3, 0xab, 0x0b, 0x9a, 0x13, 0x14, 0xc2, 0x98, 0xb0, 0xb4, 0xc3,
-	0x72, 0x35, 0xcb, 0x23, 0x2c, 0x6d, 0x39, 0x01, 0x78, 0x57, 0x22, 0x61, 0x65, 0x96, 0x08, 0xaa,
-	0x6e, 0xfd, 0x9e, 0xee, 0xb1, 0x1b, 0x42, 0x87, 0x30, 0x90, 0x49, 0x5e, 0x64, 0x94, 0x5d, 0xf9,
-	0x7d, 0x9d, 0x6e, 0x71, 0x78, 0x0c, 0xff, 0xbf, 0x29, 0x05, 0x4b, 0xf9, 0x92, 0x9d, 0x17, 0x89,
-	0x90, 0xe4, 0x2c, 0x51, 0x82, 0xae, 0x30, 0x5f, 0x6a, 0x7b, 0x3c, 0x2b, 0x73, 0x26, 0x7d, 0x2b,
-	0xb0, 0xa7, 0x63, 0x6c, 0x60, 0xf8, 0xc5, 0x82, 0xbd, 0x4d, 0x2a, 0x84, 0xc0, 0x61, 0x49, 0x4e,
-	0xf4, 0xa0, 0x86, 0x58, 0x3f, 0xa3, 0x23, 0xd8, 0x66, 0x65, 0x3e, 0x27, 0x22, 0xe6, 0x97, 0xb1,
-	0xe0, 0x4b, 0xa9, 0x87, 0xe5, 0xe2, 0x51, 0x1d, 0x7d, 0x7f, 0x89, 0xf9, 0x52, 0xa2, 0x47, 0xb0,
-	0xbb, 0x66, 0x99, 0xd7, 0xda, 0x9a, 0x38, 0x31, 0xc4, 0x93, 0x3a, 0x8c, 0x9e, 0x80, 0xa3, 0xeb,
-	0x38, 0x81, 0x3d, 0xf5, 0x66, 0x7e, 0x74, 0x87, 0x01, 0xac, 0x59, 0xe1, 0x57, 0x0b, 0xee, 0x9d,
-	0xf0, 0xbc, 0x10, 0x44, 0x4a, 0x92, 0xd6, 0x1c, 0xcc, 0x97, 0x4d, 0xc7, 0xbf, 0x77, 0x67, 0xfd,
-	0x6b, 0x77, 0x5b, 0x9b, 0xbb, 0x43, 0xe0, 0x54, 0x77, 0xe3, 0xdb, 0x81, 0x3d, 0xb5, 0xb1, 0x63,
-	0x6e, 0x88, 0xb2, 0x94, 0x2e, 0x48, 0xdd, 0xb4, 0x8b, 0x0d, 0x44, 0x07, 0xd0, 0xa3, 0x2c, 0x2d,
-	0x94, 0xf0, 0x5d, 0xcd, 0x6f, 0x50, 0x78, 0x0e, 0xfd, 0x13, 0x5e, 0x16, 0x19, 0x91, 0x68, 0x0f,
-	0x5c, 0xca, 0x52, 0xb2, 0xd2, 0x5b, 0x18, 0xe2, 0x1a, 0xa0, 0x19, 0xf4, 0x72, 0x6d, 0x41, 0xf7,
-	0xe1, 0xcd, 0x0e, 0xa3, 0x3b, 0x4d, 0xe2, 0x86, 0x19, 0x1e, 0xc1, 0xe8, 0x82, 0x97, 0x8b, 0x6b,
-	0x92, 0x9e, 0xd2, 0xa6, 0xf2, 0x65, 0xf5, 0xa0, 0x2b, 0xbb, 0xb8, 0x06, 0xe1, 0x29, 0xec, 0xbf,
-	0x25, 0x37, 0x24, 0xe3, 0x05, 0x11, 0x3f, 0xd1, 0x9f, 0x02, 0xa4, 0x26, 0x51, 0x6b, 0xbc, 0xd9,
-	0x38, 0xea, 0x52, 0x70, 0x87, 0x10, 0x7e, 0xb7, 0x61, 0xf2, 0x9a, 0x25, 0xd9, 0xad, 0xa4, 0x12,
-	0x13, 0x59, 0x66, 0x4a, 0xa2, 0x07, 0xd0, 0xbb, 0x26, 0x49, 0x4a, 0x84, 0x1e, 0xb3, 0x37, 0x1b,
-	0x46, 0xe6, 0x3b, 0xc3, 0x4d, 0x02, 0xbd, 0x82, 0x9d, 0x79, 0xb3, 0xd0, 0xb8, 0x10, 0xfc, 0x33,
-	0x59, 0xa8, 0xc6, 0xe2, 0xfe, 0xe6, 0x4d, 0x4f, 0x0c, 0xfd, 0x43, 0xcd, 0x46, 0x2f, 0x61, 0xbb,
-	0xad, 0x50, 0xfb, 0xb3, 0x75, 0xaf, 0x77, 0xe8, 0xc7, 0x86, 0x5c, 0xbb, 0x3c, 0x85, 0xff, 0x5a,
-	0x75, 0xc7, 0xae, 0xf3, 0xa7, 0x12, 0xc8, 0x28, 0xda, 0xc1, 0x49, 0xf4, 0x11, 0x0e, 0xd6, 0xf2,
-	0x98, 0x32, 0x45, 0x44, 0xb2, 0x50, 0xd5, 0x6f, 0xc4, 0xfd, 0xeb, 0xc2, 0xf6, 0xd7, 0xca, 0x77,
-	0x6b, 0x21, 0x7a, 0x0c, 0xa3, 0xca, 0x4f, 0xbc, 0xa8, 0x2f, 0x43, 0x7f, 0xeb, 0xde, 0x6c, 0x10,
-	0x35, 0x97, 0x82, 0xbd, 0x2a, 0x6b, 0xce, 0xe6, 0x39, 0xec, 0xb6, 0x55, 0x5a, 0x45, 0xff, 0x17,
-	0xc5, 0x4e, 0x4b, 0x31, 0xb2, 0x17, 0x30, 0x56, 0xf5, 0x46, 0x9b, 0xd9, 0x0d, 0xb4, 0xe4, 0x20,
-	0xda, 0x78, 0x13, 0x78, 0xa4, 0x3a, 0x68, 0xde, 0xd3, 0xff, 0xcf, 0xe3, 0x1f, 0x01, 0x00, 0x00,
-	0xff, 0xff, 0x0e, 0x2b, 0xec, 0x5d, 0x4e, 0x05, 0x00, 0x00,
+	// 805 bytes of a gzipped FileDescriptorProto
+	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x55, 0x4d, 0x8f, 0xdb, 0x36,
+	0x10, 0x85, 0x56, 0xfe, 0x1c, 0xd9, 0xdd, 0x2c, 0x91, 0x6c, 0xd4, 0x2d, 0x92, 0xba, 0x42, 0x5a,
+	0x18, 0x4d, 0xa3, 0x00, 0x0e, 0x0a, 0x14, 0xc9, 0xa5, 0x1b, 0xb7, 0x01, 0x72, 0x48, 0x0b, 0x70,
+	0x37, 0x67, 0x81, 0x96, 0xe8, 0x35, 0x5b, 0x89, 0x14, 0x48, 0x6a, 0xbd, 0xfe, 0x41, 0xbd, 0x14,
+	0x28, 0x7a, 0xe8, 0xa9, 0x97, 0xfe, 0xb6, 0x82, 0x1f, 0xb2, 0x9d, 0x85, 0x37, 0xc8, 0x8d, 0x6f,
+	0xe6, 0x3d, 0xea, 0xcd, 0x0c, 0x49, 0x41, 0x54, 0x2f, 0x9e, 0xd7, 0x8b, 0xb4, 0x96, 0x42, 0x8b,
+	0xe4, 0xbf, 0x00, 0x06, 0xef, 0xa8, 0x26, 0x05, 0xd1, 0x04, 0xc5, 0xd0, 0xbf, 0xa6, 0x52, 0x31,
+	0xc1, 0xe3, 0x60, 0x12, 0x4c, 0xbb, 0xb8, 0x85, 0x08, 0x41, 0x67, 0x45, 0xd4, 0x2a, 0x3e, 0x9a,
+	0x04, 0xd3, 0x21, 0xb6, 0x6b, 0xf4, 0x18, 0x40, 0xd2, 0x5a, 0x28, 0xa6, 0x85, 0xdc, 0xc4, 0xa1,
+	0xcd, 0xec, 0x45, 0xd0, 0x37, 0x70, 0xbc, 0xa0, 0x57, 0x8c, 0x67, 0x0d, 0x67, 0x37, 0x99, 0x66,
+	0x15, 0x8d, 0x3b, 0x93, 0x60, 0x1a, 0xe2, 0xb1, 0x0d, 0xbf, 0xe7, 0xec, 0xe6, 0x92, 0x55, 0x14,
+	0x25, 0x30, 0xa6, 0xbc, 0xd8, 0x63, 0x75, 0x2d, 0x2b, 0xa2, 0xbc, 0xd8, 0x72, 0x62, 0xe8, 0xe7,
+	0xa2, 0xaa, 0x98, 0x56, 0x71, 0xcf, 0x39, 0xf3, 0x30, 0x79, 0x01, 0x0f, 0x5f, 0x37, 0x92, 0x17,
+	0x62, 0xcd, 0x2f, 0x6a, 0x22, 0x15, 0x7d, 0x47, 0xb4, 0x64, 0x37, 0x58, 0xac, 0x9d, 0xa8, 0x6c,
+	0x2a, 0xae, 0xe2, 0x60, 0x12, 0x4e, 0xc7, 0xb8, 0x85, 0xc9, 0x9f, 0x01, 0xdc, 0x3f, 0xa4, 0x32,
+	0x75, 0x72, 0x52, 0x51, 0x5b, 0xfe, 0x10, 0xdb, 0x35, 0x7a, 0x02, 0x9f, 0xf1, 0xa6, 0x5a, 0x50,
+	0x99, 0x89, 0x65, 0x26, 0xc5, 0x5a, 0xd9, 0x2e, 0x74, 0xf1, 0xc8, 0x45, 0x7f, 0x5d, 0x62, 0xb1,
+	0x56, 0xe8, 0x5b, 0x38, 0xd9, 0xb1, 0xda, 0xcf, 0x86, 0x96, 0x78, 0xdc, 0x12, 0xe7, 0x2e, 0x8c,
+	0xbe, 0x83, 0x8e, 0xdd, 0xa7, 0x33, 0x09, 0xa7, 0xd1, 0x2c, 0x4e, 0xef, 0x28, 0x00, 0x5b, 0x56,
+	0xf2, 0xf7, 0xd1, 0xae, 0xc4, 0x73, 0x4e, 0xca, 0x8d, 0x62, 0x0a, 0x53, 0xd5, 0x94, 0x5a, 0xa1,
+	0x09, 0x44, 0x57, 0x92, 0xf0, 0xa6, 0x24, 0x92, 0xe9, 0x8d, 0x9f, 0xda, 0x7e, 0x08, 0x9d, 0xc1,
+	0x40, 0x91, 0xaa, 0x2e, 0x19, 0xbf, 0xf2, 0xbe, 0xb7, 0x18, 0x3d, 0x87, 0x7e, 0x2d, 0xc5, 0x6f,
+	0x34, 0xd7, 0xd6, 0x69, 0x34, 0x7b, 0x70, 0xd8, 0x4a, 0xcb, 0x42, 0x4f, 0xa1, 0xbb, 0x64, 0x25,
+	0x6d, 0x9d, 0xdf, 0x41, 0x77, 0x1c, 0xf4, 0x0c, 0x7a, 0x35, 0x15, 0x75, 0x69, 0x06, 0xfa, 0x11,
+	0xb6, 0x27, 0xa1, 0xb7, 0x80, 0xdc, 0x2a, 0x63, 0x5c, 0x53, 0x49, 0x72, 0x6d, 0xce, 0x61, 0xcf,
+	0xfa, 0x3a, 0x4b, 0xe7, 0xa2, 0xaa, 0x25, 0x55, 0x8a, 0x16, 0x4e, 0x8c, 0xc5, 0xda, 0xeb, 0x4f,
+	0x9c, 0xea, 0xed, 0x4e, 0x94, 0xfc, 0x13, 0xc0, 0xe7, 0x77, 0x0a, 0x0e, 0xcc, 0x33, 0xf8, 0xd4,
+	0x79, 0x1e, 0x1d, 0x9e, 0x27, 0x82, 0x8e, 0xb9, 0x3f, 0x71, 0x38, 0x09, 0xa7, 0x21, 0xee, 0xb4,
+	0x77, 0x89, 0xf1, 0x82, 0xe5, 0xbe, 0x59, 0x5d, 0xdc, 0x42, 0x74, 0x0a, 0x3d, 0xc6, 0x8b, 0x5a,
+	0x4b, 0xdb, 0x97, 0x10, 0x7b, 0x94, 0x5c, 0x40, 0x7f, 0x2e, 0x9a, 0xda, 0xb4, 0xee, 0x3e, 0x74,
+	0x19, 0x2f, 0xe8, 0x8d, 0x3d, 0xb7, 0x43, 0xec, 0x00, 0x9a, 0x41, 0xaf, 0xb2, 0x25, 0x58, 0x1f,
+	0x1f, 0xef, 0x8a, 0x67, 0x26, 0x4f, 0x60, 0x74, 0x29, 0x9a, 0x7c, 0x45, 0x8b, 0x37, 0xcc, 0xef,
+	0xec, 0x26, 0x18, 0x58, 0x53, 0x0e, 0x24, 0x6f, 0xe0, 0xc1, 0x4f, 0xf4, 0x9a, 0x96, 0xa2, 0xa6,
+	0xf2, 0x03, 0xfa, 0x33, 0x80, 0xa2, 0x4d, 0x38, 0x4d, 0x34, 0x1b, 0xa7, 0xfb, 0x14, 0xbc, 0x47,
+	0x48, 0xfe, 0x0d, 0xe0, 0xd4, 0xd7, 0x70, 0xfb, 0xa4, 0x3e, 0x85, 0x91, 0xf9, 0x56, 0x96, 0xbb,
+	0xb4, 0x1f, 0xec, 0x20, 0xf5, 0x74, 0x1c, 0x99, 0x6c, 0x5b, 0xff, 0xf7, 0x70, 0xb2, 0xdd, 0x75,
+	0xab, 0xe8, 0xdf, 0x52, 0xdc, 0xdb, 0x52, 0x5a, 0xd9, 0x2b, 0x18, 0x6b, 0x67, 0x2d, 0x73, 0x45,
+	0x0e, 0xac, 0xe4, 0x34, 0x3d, 0x58, 0x1c, 0x1e, 0xe9, 0x3d, 0x94, 0xfc, 0x11, 0x00, 0xbc, 0x3f,
+	0xbf, 0xb8, 0x9c, 0xaf, 0x08, 0xbf, 0xa2, 0xe8, 0x0b, 0x18, 0x5a, 0xbf, 0x7b, 0xcf, 0xc1, 0xc0,
+	0x04, 0x7e, 0x31, 0x4f, 0xc2, 0x23, 0x00, 0x25, 0xf3, 0x6c, 0x41, 0x97, 0x42, 0x52, 0xff, 0x28,
+	0x0e, 0x95, 0xcc, 0x5f, 0xdb, 0x80, 0xd1, 0x9a, 0x34, 0x59, 0x6a, 0x2a, 0xfd, 0xc3, 0x38, 0x50,
+	0x32, 0x3f, 0x37, 0x18, 0x7d, 0x09, 0x51, 0x43, 0x94, 0x6e, 0xc5, 0x1d, 0xf7, 0x6e, 0x9a, 0x90,
+	0x57, 0x3f, 0x02, 0x8b, 0xbc, 0xbc, 0xeb, 0x36, 0x37, 0x11, 0xab, 0x4f, 0x7e, 0x84, 0x87, 0x3b,
+	0x9b, 0xea, 0x82, 0x5c, 0x53, 0xd9, 0xf6, 0xf8, 0x6b, 0xe8, 0xe7, 0x2e, 0xec, 0x47, 0x15, 0xa5,
+	0x3b, 0x2a, 0x6e, 0x73, 0xc9, 0x5f, 0x01, 0x1c, 0xdf, 0x1e, 0xcf, 0x57, 0xd0, 0x5b, 0x51, 0x52,
+	0x50, 0x69, 0x6b, 0x8d, 0x66, 0xc3, 0xb4, 0xfd, 0x2b, 0x60, 0x9f, 0x40, 0x2f, 0x61, 0x90, 0x0b,
+	0xae, 0x29, 0xd7, 0xe6, 0x22, 0x98, 0xed, 0x1f, 0xa7, 0xb7, 0xb6, 0x49, 0xe7, 0x9e, 0xf0, 0x33,
+	0xd7, 0x72, 0x83, 0xb7, 0xfc, 0xb3, 0x57, 0x30, 0xfe, 0x20, 0x85, 0xee, 0x41, 0xf8, 0x3b, 0xdd,
+	0xf8, 0xc6, 0x9a, 0xa5, 0x39, 0x99, 0xd7, 0xa4, 0x6c, 0x5c, 0x3b, 0x47, 0xd8, 0x81, 0x97, 0x47,
+	0x3f, 0x04, 0x8b, 0x9e, 0xfd, 0x55, 0xbd, 0xf8, 0x3f, 0x00, 0x00, 0xff, 0xff, 0xd4, 0x4f, 0x9f,
+	0xe0, 0xb9, 0x06, 0x00, 0x00,
 }

+ 38 - 20
pb/pb.proto

@@ -3,18 +3,16 @@ syntax = "proto3";
 message Metadata {
     // this format is versioned
     int32 version = 1;
-    // complete command line used to write this message
-    string cmdline = 2;
+    // git hash of the revision from which Hercules is built
+    string hash = 2;
     // repository's name
     string repository = 3;
     // timestamp of the first analysed commit
     int64 begin_unix_time = 4;
     // timestamp of the last analysed commit
     int64 end_unix_time = 5;
-    // how many days are in each band [burndown_project, burndown_file, burndown_developer]
-    int32 granularity = 6;
-    // how frequently we measure the state of each band [burndown_project, burndown_file, burndown_developer]
-    int32 sampling = 7;
+    // number of processed commits
+    int32 commits = 6;
 }
 
 message BurndownSparseMatrixRow {
@@ -31,6 +29,21 @@ message BurndownSparseMatrix {
     repeated BurndownSparseMatrixRow rows = 4;
 }
 
+message BurndownAnalysisResults {
+    // how many days are in each band [burndown_project, burndown_file, burndown_developer]
+    int32 granularity = 1;
+    // how frequently we measure the state of each band [burndown_project, burndown_file, burndown_developer]
+    int32 sampling = 2;
+    // always exists
+    BurndownSparseMatrix project = 3;
+    // this is included if `-burndown-files` was specified
+    repeated BurndownSparseMatrix files = 4;
+    // these two are included if `-burndown-people` was specified
+    repeated BurndownSparseMatrix people = 5;
+    // rows and cols order correspond to `burndown_developer`
+    CompressedSparseRowMatrix people_interaction = 6;
+}
+
 message CompressedSparseRowMatrix {
     int32 number_of_rows = 1;
     int32 number_of_columns = 2;
@@ -56,21 +69,26 @@ message DeveloperTouchedFiles {
     repeated TouchedFiles developers = 1;
 }
 
-message AnalysisResults {
-    // these two are always included
-    Metadata header = 1;
-    BurndownSparseMatrix burndown_project = 2;
-
-    // this is included if `-files` was specified
-    repeated BurndownSparseMatrix burndown_files = 3;
-
-    // these two are included if `-people` was specified
-    repeated BurndownSparseMatrix burndown_developers = 4;
-    // rows and cols order correspond to `burndown_developer`
-    CompressedSparseRowMatrix developers_interaction = 5;
-
-    // these three are included if `-couples` was specified
+message CouplesAnalysisResults {
     Couples file_couples = 6;
     Couples developer_couples = 7;
     DeveloperTouchedFiles touched_files = 8;
 }
+
+message UASTChange {
+    string file_name = 1;
+    string src_before = 2;
+	string src_after = 3;
+	string uast_before = 4;
+	string uast_after = 5;
+}
+
+message UASTChangesSaverResults {
+    repeated UASTChange changes = 1;
+}
+
+message AnalysisResults {
+    Metadata header = 1;
+    // the mapped values are dynamic messages which require the second parsing pass.
+    map<string, bytes> contents = 2;
+}

A különbségek nem kerülnek megjelenítésre, a fájl túl nagy
+ 292 - 58
pb/pb_pb2.py


+ 13 - 3
pb/utils.go

@@ -1,5 +1,7 @@
 package pb
 
+import "sort"
+
 func ToBurndownSparseMatrix(matrix [][]int64, name string) *BurndownSparseMatrix {
   r := BurndownSparseMatrix{
 	  Name: name,
@@ -65,9 +67,17 @@ func MapToCompressedSparseRowMatrix(matrix []map[int]int64) *CompressedSparseRow
 	}
 	r.Indptr[0] = 0
 	for _, row := range matrix {
-		for x, col := range row {
-			r.Data = append(r.Data, col)
-			r.Indices = append(r.Indices, int32(x))
+		order := make([]int, len(row))
+		i := 0
+		for col := range row {
+			order[i] = col
+			i++
+		}
+		sort.Ints(order)
+		for _, col := range order {
+			val := row[col]
+			r.Data = append(r.Data, val)
+			r.Indices = append(r.Indices, int32(col))
 		}
 		r.Indptr = append(r.Indptr, r.Indptr[len(r.Indptr) - 1] + int64(len(row)))
 	}

+ 416 - 28
pipeline.go

@@ -1,18 +1,56 @@
 package hercules
 
 import (
+	"bufio"
 	"errors"
+	"flag"
 	"fmt"
 	"io"
+	"io/ioutil"
 	"os"
+	"path/filepath"
+	"reflect"
+	"sort"
+	"strings"
+	"unsafe"
 
-	"bufio"
 	"gopkg.in/src-d/go-git.v4"
 	"gopkg.in/src-d/go-git.v4/plumbing"
 	"gopkg.in/src-d/go-git.v4/plumbing/object"
-	"gopkg.in/src-d/hercules.v2/toposort"
+	"gopkg.in/src-d/hercules.v3/toposort"
+)
+
+type ConfigurationOptionType int
+
+const (
+	// Boolean value type.
+	BoolConfigurationOption ConfigurationOptionType = iota
+	// Integer value type.
+	IntConfigurationOption
+	// String value type.
+	StringConfigurationOption
+)
+
+const (
+	ConfigPipelineDumpPath = "Pipeline.DumpPath"
+	ConfigPipelineDryRun = "Pipeline.DryRun"
 )
 
+// ConfigurationOption allows for the unified, retrospective way to setup PipelineItem-s.
+type ConfigurationOption struct {
+	// Name identifies the configuration option in facts.
+	Name string
+	// Description represents the help text about the configuration option.
+	Description string
+	// Flag corresponds to the CLI token with "-" prepended.
+	Flag string
+	// Type specifies the kind of the configuration option's value.
+	Type ConfigurationOptionType
+	// Default is the initial value of the configuration option.
+	Default interface{}
+}
+
+// PipelineItem is the interface for all the units of the Git commit analysis pipeline.
 type PipelineItem interface {
 	// Name returns the name of the analysis.
 	Name() string
@@ -21,6 +59,11 @@ type PipelineItem interface {
 	Provides() []string
 	// Requires returns the list of keys of needed entities which must be supplied in Consume().
 	Requires() []string
+	// ListConfigurationOptions returns the list of available options which can be consumed by Configure().
+	ListConfigurationOptions() []ConfigurationOption
+	// Configure performs the initial setup of the object by applying parameters from facts.
+	// It allows to create PipelineItems in a universal way.
+	Configure(facts map[string]interface{})
 	// Initialize prepares and resets the item. Consume() requires Initialize()
 	// to be called at least once beforehand.
 	Initialize(*git.Repository)
@@ -29,8 +72,161 @@ type PipelineItem interface {
 	// "commit" and "index".
 	// Returns the calculated entities which match Provides().
 	Consume(deps map[string]interface{}) (map[string]interface{}, error)
+}
+
+// FeaturedPipelineItem enables switching the automatic insertion of pipeline items on or off.
+type FeaturedPipelineItem interface {
+	PipelineItem
+	// Features returns the list of names which enable this item to be automatically inserted
+	// in Pipeline.DeployItem().
+	Features() []string
+}
+
+// LeafPipelineItem corresponds to the top level pipeline items which produce the end results.
+type LeafPipelineItem interface {
+	PipelineItem
+	// Flag returns the cmdline name of the item.
+	Flag() string
 	// Finalize returns the result of the analysis.
 	Finalize() interface{}
+	// Serialize encodes the object returned by Finalize() to Text or Protocol Buffers.
+	Serialize(result interface{}, binary bool, writer io.Writer) error
+}
+
+// PipelineItemRegistry contains all the known PipelineItem-s.
+type PipelineItemRegistry struct {
+	provided   map[string][]reflect.Type
+	registered map[string]reflect.Type
+	flags      map[string]reflect.Type
+}
+
+// Register adds another PipelineItem to the registry.
+func (registry *PipelineItemRegistry) Register(example PipelineItem) {
+	t := reflect.TypeOf(example)
+	registry.registered[example.Name()] = t
+	if fpi, ok := interface{}(example).(LeafPipelineItem); ok {
+		registry.flags[fpi.Flag()] = t
+	}
+	for _, dep := range example.Provides() {
+		ts := registry.provided[dep]
+		if ts == nil {
+			ts = []reflect.Type{}
+		}
+		ts = append(ts, t)
+		registry.provided[dep] = ts
+	}
+}
+
+func (registry *PipelineItemRegistry) Summon(providesOrName string) []PipelineItem {
+	if registry.provided == nil {
+		return []PipelineItem{}
+	}
+	ts := registry.provided[providesOrName]
+	items := []PipelineItem{}
+	for _, t := range ts {
+		items = append(items, reflect.New(t.Elem()).Interface().(PipelineItem))
+	}
+	if t, exists := registry.registered[providesOrName]; exists {
+		items = append(items, reflect.New(t.Elem()).Interface().(PipelineItem))
+	}
+	return items
+}
+
+type arrayFeatureFlags struct {
+	// Flags containts the features activated through the command line.
+	Flags []string
+	// Choices contains all registered features.
+	Choices map[string]bool
+}
+
+func (acf *arrayFeatureFlags) String() string {
+	return strings.Join([]string(acf.Flags), ", ")
+}
+
+func (acf *arrayFeatureFlags) Set(value string) error {
+	if _, exists := acf.Choices[value]; !exists {
+		return errors.New(fmt.Sprintf("Feature \"%s\" is not registered.", value))
+	}
+	acf.Flags = append(acf.Flags, value)
+	return nil
+}
+
+var featureFlags = arrayFeatureFlags{Flags: []string{}, Choices: map[string]bool{}}
+
+// AddFlags inserts the cmdline options from PipelineItem.ListConfigurationOptions(),
+// FeaturedPipelineItem().Features() and LeafPipelineItem.Flag() into the global "flag" parser
+// built into the Go runtime.
+// Returns the "facts" which can be fed into PipelineItem.Configure() and the dictionary of
+// runnable analysis (LeafPipelineItem) choices. E.g. if "BurndownAnalysis" was activated
+// through "-burndown" cmdline argument, this mapping would contain ["BurndownAnalysis"] = *true.
+func (registry *PipelineItemRegistry) AddFlags() (map[string]interface{}, map[string]*bool) {
+	flags := map[string]interface{}{}
+	deployed := map[string]*bool{}
+	for name, it := range registry.registered {
+		formatHelp := func(desc string) string {
+			return fmt.Sprintf("%s [%s]", desc, name)
+		}
+		itemIface := reflect.New(it.Elem()).Interface()
+		for _, opt := range itemIface.(PipelineItem).ListConfigurationOptions() {
+			var iface interface{}
+			switch opt.Type {
+			case BoolConfigurationOption:
+				iface = interface{}(true)
+				ptr := (**bool)(unsafe.Pointer(uintptr(unsafe.Pointer(&iface)) + unsafe.Sizeof(&iface)))
+				*ptr = flag.Bool(opt.Flag, opt.Default.(bool), formatHelp(opt.Description))
+			case IntConfigurationOption:
+				iface = interface{}(0)
+				ptr := (**int)(unsafe.Pointer(uintptr(unsafe.Pointer(&iface)) + unsafe.Sizeof(&iface)))
+				*ptr = flag.Int(opt.Flag, opt.Default.(int), formatHelp(opt.Description))
+			case StringConfigurationOption:
+				iface = interface{}("")
+				ptr := (**string)(unsafe.Pointer(uintptr(unsafe.Pointer(&iface)) + unsafe.Sizeof(&iface)))
+				*ptr = flag.String(opt.Flag, opt.Default.(string), formatHelp(opt.Description))
+			}
+			flags[opt.Name] = iface
+		}
+		if fpi, ok := itemIface.(FeaturedPipelineItem); ok {
+			for _, f := range fpi.Features() {
+				featureFlags.Choices[f] = true
+			}
+		}
+		if fpi, ok := itemIface.(LeafPipelineItem); ok {
+			deployed[fpi.Name()] = flag.Bool(
+				fpi.Flag(), false, fmt.Sprintf("Runs %s analysis.", fpi.Name()))
+		}
+	}
+	{
+		// Pipeline flags
+		iface := interface{}("")
+		ptr1 := (**string)(unsafe.Pointer(uintptr(unsafe.Pointer(&iface)) + unsafe.Sizeof(&iface)))
+		*ptr1 = flag.String("dump-dag", "", "Write the pipeline DAG to a Graphviz file.")
+		flags[ConfigPipelineDumpPath] = iface
+		iface = interface{}(true)
+		ptr2 := (**bool)(unsafe.Pointer(uintptr(unsafe.Pointer(&iface)) + unsafe.Sizeof(&iface)))
+		*ptr2 = flag.Bool("dry-run", false, "Do not run any analyses - only resolve the DAG. " +
+				"Useful for -dump-dag.")
+		flags[ConfigPipelineDryRun] = iface
+	}
+	features := []string{}
+	for f := range featureFlags.Choices {
+		features = append(features, f)
+	}
+	flag.Var(&featureFlags, "feature",
+		fmt.Sprintf("Enables specific analysis features, can be specified "+
+			"multiple times. Available features: [%s].", strings.Join(features, ", ")))
+	return flags, deployed
+}
+
+// Registry contains all known pipeline item types.
+var Registry = &PipelineItemRegistry{
+	provided:   map[string][]reflect.Type{},
+	registered: map[string]reflect.Type{},
+	flags:      map[string]reflect.Type{},
+}
+
+type wrappedPipelineItem struct {
+	Item     PipelineItem
+	Children []wrappedPipelineItem
 }
 
 type Pipeline struct {
@@ -39,27 +235,95 @@ type Pipeline struct {
 	// second is the total number of commits.
 	OnProgress func(int, int)
 
-	// repository points to the analysed Git repository struct from go-git.
+	// Repository points to the analysed Git repository struct from go-git.
 	repository *git.Repository
 
-	// items are the registered analysers in the pipeline.
+	// Items are the registered building blocks in the pipeline. The order defines the
+	// execution sequence.
 	items []PipelineItem
 
-	// plan is the resolved execution sequence.
-	plan []PipelineItem
+	// The collection of parameters to create items.
+	facts map[string]interface{}
+
+	// Feature flags which enable the corresponding items.
+	features map[string]bool
 }
 
+const FactPipelineCommits = "commits"
+
 func NewPipeline(repository *git.Repository) *Pipeline {
-	return &Pipeline{repository: repository, items: []PipelineItem{}, plan: []PipelineItem{}}
+	return &Pipeline{
+		repository: repository,
+		items:      []PipelineItem{},
+		facts:      map[string]interface{}{},
+		features:   map[string]bool{},
+	}
 }
 
-func (pipeline *Pipeline) AddItem(item PipelineItem) {
-	for _, reg := range pipeline.items {
-		if reg == item {
-			return
+func (pipeline *Pipeline) GetFact(name string) interface{} {
+	return pipeline.facts[name]
+}
+
+func (pipeline *Pipeline) SetFact(name string, value interface{}) {
+	pipeline.facts[name] = value
+}
+
+func (pipeline *Pipeline) GetFeature(name string) (bool, bool) {
+	val, exists := pipeline.features[name]
+	return val, exists
+}
+
+func (pipeline *Pipeline) SetFeature(name string) {
+	pipeline.features[name] = true
+}
+
+func (pipeline *Pipeline) SetFeaturesFromFlags() {
+	for _, feature := range featureFlags.Flags {
+		pipeline.SetFeature(feature)
+	}
+}
+
+func (pipeline *Pipeline) DeployItem(item PipelineItem) PipelineItem {
+	queue := []PipelineItem{}
+	queue = append(queue, item)
+	added := map[string]PipelineItem{}
+	for _, item := range pipeline.items {
+		added[item.Name()] = item
+	}
+	added[item.Name()] = item
+	pipeline.AddItem(item)
+	for len(queue) > 0 {
+		head := queue[0]
+		queue = queue[1:]
+		for _, dep := range head.Requires() {
+			for _, sibling := range Registry.Summon(dep) {
+				if _, exists := added[sibling.Name()]; !exists {
+					disabled := false
+					// If this item supports features, check them against the activated in pipeline.features
+					if fpi, matches := interface{}(sibling).(FeaturedPipelineItem); matches {
+						for _, feature := range fpi.Features() {
+							if !pipeline.features[feature] {
+								disabled = true
+								break
+							}
+						}
+					}
+					if disabled {
+						continue
+					}
+					added[sibling.Name()] = sibling
+					queue = append(queue, sibling)
+					pipeline.AddItem(sibling)
+				}
+			}
 		}
 	}
+	return item
+}
+
+func (pipeline *Pipeline) AddItem(item PipelineItem) PipelineItem {
 	pipeline.items = append(pipeline.items, item)
+	return item
 }
 
 func (pipeline *Pipeline) RemoveItem(item PipelineItem) {
@@ -71,6 +335,10 @@ func (pipeline *Pipeline) RemoveItem(item PipelineItem) {
 	}
 }
 
+func (pipeline *Pipeline) Len() int {
+	return len(pipeline.items)
+}
+
 // Commits returns the critical path in the repository's history. It starts
 // from HEAD and traces commits backwards till the root. When it encounters
 // a merge (more than one parent), it always chooses the first parent.
@@ -99,40 +367,158 @@ func (pipeline *Pipeline) Commits() []*object.Commit {
 	return result
 }
 
-func (pipeline *Pipeline) Initialize() {
+type sortablePipelineItems []PipelineItem
+
+func (items sortablePipelineItems) Len() int {
+	return len(items)
+}
+
+func (items sortablePipelineItems) Less(i, j int) bool {
+	return items[i].Name() < items[j].Name()
+}
+
+func (items sortablePipelineItems) Swap(i, j int) {
+	items[i], items[j] = items[j], items[i]
+}
+
+func (pipeline *Pipeline) resolve(dumpPath string) {
 	graph := toposort.NewGraph()
+	sort.Sort(sortablePipelineItems(pipeline.items))
 	name2item := map[string]PipelineItem{}
-	for index, item := range pipeline.items {
-		name := fmt.Sprintf("%s_%d", item.Name(), index)
+	ambiguousMap := map[string][]string{}
+	nameUsages := map[string]int{}
+	for _, item := range pipeline.items {
+		nameUsages[item.Name()]++
+	}
+	counters := map[string]int{}
+	for _, item := range pipeline.items {
+		name := item.Name()
+		if nameUsages[name] > 1 {
+			index := counters[item.Name()] + 1
+			counters[item.Name()] = index
+			name = fmt.Sprintf("%s_%d", item.Name(), index)
+		}
 		graph.AddNode(name)
 		name2item[name] = item
 		for _, key := range item.Provides() {
-			key += "_entity"
+			key = "[" + key + "]"
 			graph.AddNode(key)
-			graph.AddEdge(name, key)
+			if graph.AddEdge(name, key) > 1 {
+				if ambiguousMap[key] != nil {
+					fmt.Fprintln(os.Stderr, "Pipeline:")
+					for _, item2 := range pipeline.items {
+						if item2 == item {
+							fmt.Fprint(os.Stderr, "> ")
+						}
+						fmt.Fprint(os.Stderr, item2.Name(), " [")
+						for i, key2 := range item2.Provides() {
+							fmt.Fprint(os.Stderr, key2)
+							if i < len(item.Provides()) - 1 {
+								fmt.Fprint(os.Stderr, ", ")
+							}
+						}
+						fmt.Fprintln(os.Stderr, "]")
+					}
+					panic("Failed to resolve pipeline dependencies: ambiguous graph.")
+				}
+				ambiguousMap[key] = graph.FindParents(key)
+			}
 		}
 	}
-	for index, item := range pipeline.items {
-		name := fmt.Sprintf("%s_%d", item.Name(), index)
+	counters = map[string]int{}
+	for _, item := range pipeline.items {
+		name := item.Name()
+		if nameUsages[name] > 1 {
+			index := counters[item.Name()] + 1
+			counters[item.Name()] = index
+			name = fmt.Sprintf("%s_%d", item.Name(), index)
+		}
 		for _, key := range item.Requires() {
-			key += "_entity"
-			if !graph.AddEdge(key, name) {
+			key = "[" + key + "]"
+			if graph.AddEdge(key, name) == 0 {
 				panic(fmt.Sprintf("Unsatisfied dependency: %s -> %s", key, item.Name()))
 			}
 		}
 	}
+	if len(ambiguousMap) > 0 {
+		ambiguous := []string{}
+		for key := range ambiguousMap {
+			ambiguous = append(ambiguous, key)
+		}
+		sort.Strings(ambiguous)
+		bfsorder := graph.BreadthSort()
+		bfsindex := map[string]int{}
+		for i, s := range bfsorder {
+			bfsindex[s] = i
+		}
+		for len(ambiguous) > 0 {
+			key := ambiguous[0]
+			ambiguous = ambiguous[1:]
+			pair := ambiguousMap[key]
+			inheritor := pair[1]
+			if bfsindex[pair[1]] < bfsindex[pair[0]] {
+				inheritor = pair[0]
+			}
+			removed := graph.RemoveEdge(key, inheritor)
+			cycle := map[string]bool{}
+			for _, node := range graph.FindCycle(key) {
+				cycle[node] = true
+			}
+			if len(cycle) == 0 {
+				cycle[inheritor] = true
+			}
+			if removed {
+				graph.AddEdge(key, inheritor)
+			}
+			graph.RemoveEdge(inheritor, key)
+			graph.ReindexNode(inheritor)
+			// for all nodes key links to except those in cycle, put the link from inheritor
+			for _, node := range graph.FindChildren(key) {
+				if _, exists := cycle[node]; !exists {
+					graph.AddEdge(inheritor, node)
+					graph.RemoveEdge(key, node)
+				}
+			}
+			graph.ReindexNode(key)
+		}
+	}
+	var graphCopy *toposort.Graph
+	if dumpPath != "" {
+		graphCopy = graph.Copy()
+	}
 	strplan, ok := graph.Toposort()
 	if !ok {
-		panic("Failed to resolve pipeline dependencies.")
+		panic("Failed to resolve pipeline dependencies: unable to topologically sort the items.")
 	}
+	pipeline.items = make([]PipelineItem, 0, len(pipeline.items))
 	for _, key := range strplan {
-		item, ok := name2item[key]
-		if ok {
-			pipeline.plan = append(pipeline.plan, item)
+		if item, ok := name2item[key]; ok {
+			pipeline.items = append(pipeline.items, item)
 		}
 	}
-	if len(pipeline.plan) != len(pipeline.items) {
-		panic("Internal pipeline dependency resolution error.")
+	if dumpPath != "" {
+		// If there is a floating difference, uncomment this:
+		// fmt.Fprint(os.Stderr, graphCopy.DebugDump())
+		ioutil.WriteFile(dumpPath, []byte(graphCopy.Serialize(strplan)), 0666)
+		absPath, _ := filepath.Abs(dumpPath)
+		fmt.Fprintf(os.Stderr, "Wrote the DAG to %s\n", absPath)
+	}
+}
+
+func (pipeline *Pipeline) Initialize(facts map[string]interface{}) {
+	if facts == nil {
+		facts = map[string]interface{}{}
+	}
+	if _, exists := facts[FactPipelineCommits]; !exists {
+		facts[FactPipelineCommits] = pipeline.Commits()
+	}
+	dumpPath, _ := facts[ConfigPipelineDumpPath].(string)
+	pipeline.resolve(dumpPath)
+	if dryRun, _ := facts[ConfigPipelineDryRun].(bool); dryRun {
+		return
+	}
+	for _, item := range pipeline.items {
+		item.Configure(facts)
 	}
 	for _, item := range pipeline.items {
 		item.Initialize(pipeline.repository)
@@ -152,7 +538,7 @@ func (pipeline *Pipeline) Run(commits []*object.Commit) (map[PipelineItem]interf
 	for index, commit := range commits {
 		onProgress(index, len(commits))
 		state := map[string]interface{}{"commit": commit, "index": index}
-		for _, item := range pipeline.plan {
+		for _, item := range pipeline.items {
 			update, err := item.Consume(state)
 			if err != nil {
 				fmt.Fprintf(os.Stderr, "%s failed on commit #%d %s\n",
@@ -171,7 +557,9 @@ func (pipeline *Pipeline) Run(commits []*object.Commit) (map[PipelineItem]interf
 	onProgress(len(commits), len(commits))
 	result := map[PipelineItem]interface{}{}
 	for _, item := range pipeline.items {
-		result[item] = item.Finalize()
+		if fpi, ok := interface{}(item).(LeafPipelineItem); ok {
+			result[item] = fpi.Finalize()
+		}
 	}
 	return result, nil
 }

+ 203 - 13
pipeline_test.go

@@ -4,7 +4,10 @@ import (
 	"errors"
 	"io"
 	"io/ioutil"
+	"flag"
 	"os"
+	"path"
+	"reflect"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
@@ -35,6 +38,29 @@ func (item *testPipelineItem) Requires() []string {
 	return []string{}
 }
 
+func (item *testPipelineItem) Configure(facts map[string]interface{}) {
+}
+
+func (item *testPipelineItem) ListConfigurationOptions() []ConfigurationOption {
+	options := [...]ConfigurationOption{{
+		Name:        "TestOption",
+		Description: "The option description.",
+		Flag:        "test-option",
+		Type:        IntConfigurationOption,
+		Default:     10,
+	}}
+	return options[:]
+}
+
+func (item *testPipelineItem) Flag() string {
+	return "mytest"
+}
+
+func (item *testPipelineItem) Features() []string {
+	f := [...]string{"power"}
+	return f[:]
+}
+
 func (item *testPipelineItem) Initialize(repository *git.Repository) {
 	item.Initialized = repository != nil
 }
@@ -61,6 +87,43 @@ func (item *testPipelineItem) Finalize() interface{} {
 	return item
 }
 
+func (item *testPipelineItem) Serialize(result interface{}, binary bool, writer io.Writer) error {
+	return nil
+}
+
+func getRegistry() *PipelineItemRegistry {
+	return &PipelineItemRegistry{
+		provided:   map[string][]reflect.Type{},
+		registered: map[string]reflect.Type{},
+		flags:      map[string]reflect.Type{},
+	}
+}
+
+func TestPipelineItemRegistrySummon(t *testing.T) {
+	reg := getRegistry()
+	reg.Register(&testPipelineItem{})
+	summoned := reg.Summon((&testPipelineItem{}).Provides()[0])
+	assert.Len(t, summoned, 1)
+	assert.Equal(t, summoned[0].Name(), (&testPipelineItem{}).Name())
+	summoned = reg.Summon((&testPipelineItem{}).Name())
+	assert.Len(t, summoned, 1)
+	assert.Equal(t, summoned[0].Name(), (&testPipelineItem{}).Name())
+}
+
+func TestPipelineItemRegistryAddFlags(t *testing.T) {
+	reg := getRegistry()
+	reg.Register(&testPipelineItem{})
+	flag.CommandLine = flag.NewFlagSet(os.Args[0], flag.ExitOnError)
+	facts, deployed := reg.AddFlags()
+	assert.Len(t, facts, 3)
+	assert.IsType(t, 0, facts[(&testPipelineItem{}).ListConfigurationOptions()[0].Name])
+	assert.Contains(t, facts, ConfigPipelineDryRun)
+	assert.Contains(t, facts, ConfigPipelineDumpPath)
+	assert.Len(t, deployed, 1)
+	assert.Contains(t, deployed, (&testPipelineItem{}).Name())
+	assert.NotNil(t, flag.Lookup((&testPipelineItem{}).Flag()))
+}
+
 type dependingTestPipelineItem struct {
 	DependencySatisfied  bool
 	TestNilConsumeReturn bool
@@ -80,9 +143,27 @@ func (item *dependingTestPipelineItem) Requires() []string {
 	return arr[:]
 }
 
+func (item *dependingTestPipelineItem) ListConfigurationOptions() []ConfigurationOption {
+	options := [...]ConfigurationOption{{
+		Name:        "TestOption2",
+		Description: "The option description.",
+		Flag:        "test-option2",
+		Type:        IntConfigurationOption,
+		Default:     10,
+	}}
+	return options[:]
+}
+
+func (item *dependingTestPipelineItem) Configure(facts map[string]interface{}) {
+}
+
 func (item *dependingTestPipelineItem) Initialize(repository *git.Repository) {
 }
 
+func (item *dependingTestPipelineItem) Flag() string {
+	return "depflag"
+}
+
 func (item *dependingTestPipelineItem) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
 	_, exists := deps["test"]
 	item.DependencySatisfied = exists
@@ -94,14 +175,40 @@ func (item *dependingTestPipelineItem) Consume(deps map[string]interface{}) (map
 }
 
 func (item *dependingTestPipelineItem) Finalize() interface{} {
-	return item.DependencySatisfied
+	return true
+}
+
+func (item *dependingTestPipelineItem) Serialize(result interface{}, binary bool, writer io.Writer) error {
+	return nil
+}
+
+func TestPipelineFacts(t *testing.T) {
+	pipeline := NewPipeline(testRepository)
+	pipeline.SetFact("fact", "value")
+	assert.Equal(t, pipeline.GetFact("fact"), "value")
+}
+
+func TestPipelineFeatures(t *testing.T) {
+	pipeline := NewPipeline(testRepository)
+	pipeline.SetFeature("feat")
+	val, _ := pipeline.GetFeature("feat")
+	assert.True(t, val)
+	val, exists := pipeline.GetFeature("!")
+	assert.False(t, exists)
+	featureFlags.Set("777")
+	defer func() {
+		featureFlags = arrayFeatureFlags{Flags: []string{}, Choices: map[string]bool{}}
+	}()
+	pipeline.SetFeaturesFromFlags()
+	_, exists = pipeline.GetFeature("777")
+	assert.False(t, exists)
 }
 
 func TestPipelineRun(t *testing.T) {
 	pipeline := NewPipeline(testRepository)
 	item := &testPipelineItem{}
 	pipeline.AddItem(item)
-	pipeline.Initialize()
+	pipeline.Initialize(map[string]interface{}{})
 	assert.True(t, item.Initialized)
 	commits := make([]*object.Commit, 1)
 	commits[0], _ = testRepository.CommitObject(plumbing.NewHash(
@@ -193,13 +300,15 @@ func TestPipelineDeps(t *testing.T) {
 	item2 := &testPipelineItem{}
 	pipeline.AddItem(item1)
 	pipeline.AddItem(item2)
-	pipeline.Initialize()
+	assert.Equal(t, pipeline.Len(), 2)
+	pipeline.Initialize(map[string]interface{}{})
 	commits := make([]*object.Commit, 1)
 	commits[0], _ = testRepository.CommitObject(plumbing.NewHash(
 		"af9ddc0db70f09f3f27b4b98e415592a7485171c"))
 	result, err := pipeline.Run(commits)
 	assert.Nil(t, err)
 	assert.True(t, result[item1].(bool))
+	assert.Equal(t, result[item2], item2)
 	item1.TestNilConsumeReturn = true
 	assert.Panics(t, func() { pipeline.Run(commits) })
 }
@@ -209,7 +318,7 @@ func TestPipelineError(t *testing.T) {
 	item := &testPipelineItem{}
 	item.TestError = true
 	pipeline.AddItem(item)
-	pipeline.Initialize()
+	pipeline.Initialize(map[string]interface{}{})
 	commits := make([]*object.Commit, 1)
 	commits[0], _ = testRepository.CommitObject(plumbing.NewHash(
 		"af9ddc0db70f09f3f27b4b98e415592a7485171c"))
@@ -218,20 +327,94 @@ func TestPipelineError(t *testing.T) {
 	assert.NotNil(t, err)
 }
 
+func TestPipelineSerialize(t *testing.T) {
+	pipeline := NewPipeline(testRepository)
+	pipeline.SetFeature("uast")
+	pipeline.DeployItem(&BurndownAnalysis{})
+	facts := map[string]interface{}{}
+	facts["Pipeline.DryRun"] = true
+	tmpdir, _ := ioutil.TempDir("", "hercules-")
+	defer os.RemoveAll(tmpdir)
+	dotpath := path.Join(tmpdir, "graph.dot")
+	facts["Pipeline.DumpPath"] = dotpath
+	pipeline.Initialize(facts)
+	bdot, _ := ioutil.ReadFile(dotpath)
+	dot := string(bdot)
+	assert.Equal(t, `digraph Hercules {
+  "6 BlobCache" -> "7 [blob_cache]"
+  "0 DaysSinceStart" -> "3 [day]"
+  "9 FileDiff" -> "11 [file_diff]"
+  "15 FileDiffRefiner" -> "16 Burndown"
+  "1 IdentityDetector" -> "4 [author]"
+  "8 RenameAnalysis" -> "16 Burndown"
+  "8 RenameAnalysis" -> "9 FileDiff"
+  "8 RenameAnalysis" -> "10 UAST"
+  "8 RenameAnalysis" -> "13 UASTChanges"
+  "2 TreeDiff" -> "5 [changes]"
+  "10 UAST" -> "12 [uasts]"
+  "13 UASTChanges" -> "14 [changed_uasts]"
+  "4 [author]" -> "16 Burndown"
+  "7 [blob_cache]" -> "16 Burndown"
+  "7 [blob_cache]" -> "9 FileDiff"
+  "7 [blob_cache]" -> "8 RenameAnalysis"
+  "7 [blob_cache]" -> "10 UAST"
+  "14 [changed_uasts]" -> "15 FileDiffRefiner"
+  "5 [changes]" -> "6 BlobCache"
+  "5 [changes]" -> "8 RenameAnalysis"
+  "3 [day]" -> "16 Burndown"
+  "11 [file_diff]" -> "15 FileDiffRefiner"
+  "12 [uasts]" -> "13 UASTChanges"
+}`, dot)
+}
+
+func TestPipelineSerializeNoUast(t *testing.T) {
+	pipeline := NewPipeline(testRepository)
+	// pipeline.SetFeature("uast")
+	pipeline.DeployItem(&BurndownAnalysis{})
+	facts := map[string]interface{}{}
+	facts["Pipeline.DryRun"] = true
+	tmpdir, _ := ioutil.TempDir("", "hercules-")
+	defer os.RemoveAll(tmpdir)
+	dotpath := path.Join(tmpdir, "graph.dot")
+	facts["Pipeline.DumpPath"] = dotpath
+	pipeline.Initialize(facts)
+	bdot, _ := ioutil.ReadFile(dotpath)
+	dot := string(bdot)
+	assert.Equal(t, `digraph Hercules {
+  "6 BlobCache" -> "7 [blob_cache]"
+  "0 DaysSinceStart" -> "3 [day]"
+  "9 FileDiff" -> "10 [file_diff]"
+  "1 IdentityDetector" -> "4 [author]"
+  "8 RenameAnalysis" -> "11 Burndown"
+  "8 RenameAnalysis" -> "9 FileDiff"
+  "2 TreeDiff" -> "5 [changes]"
+  "4 [author]" -> "11 Burndown"
+  "7 [blob_cache]" -> "11 Burndown"
+  "7 [blob_cache]" -> "9 FileDiff"
+  "7 [blob_cache]" -> "8 RenameAnalysis"
+  "5 [changes]" -> "6 BlobCache"
+  "5 [changes]" -> "8 RenameAnalysis"
+  "3 [day]" -> "11 Burndown"
+  "10 [file_diff]" -> "11 Burndown"
+}`, dot)
+}
+
 func init() {
 	cwd, err := os.Getwd()
 	if err == nil {
 		testRepository, err = git.PlainOpen(cwd)
 		if err == nil {
-			iter, _ := testRepository.CommitObjects()
-			commits := -1
-			for ; err != io.EOF; _, err = iter.Next() {
-				if err != nil {
-					panic(err)
-				}
-				commits++
-				if commits >= 100 {
-					return
+			iter, err := testRepository.CommitObjects()
+			if err == nil {
+				commits := -1
+				for ; err != io.EOF; _, err = iter.Next() {
+					if err != nil {
+						panic(err)
+					}
+					commits++
+					if commits >= 100 {
+						return
+					}
 				}
 			}
 		}
@@ -240,3 +423,10 @@ func init() {
 		URL: "https://github.com/src-d/hercules",
 	})
 }
+
+func TestPipelineResolveIntegration(t *testing.T) {
+	pipeline := NewPipeline(testRepository)
+	pipeline.DeployItem(&BurndownAnalysis{})
+	pipeline.DeployItem(&CouplesAnalysis{})
+	pipeline.Initialize(nil)
+}

+ 34 - 7
renames.go

@@ -1,6 +1,8 @@
 package hercules
 
 import (
+	"fmt"
+	"os"
 	"sort"
 	"unicode/utf8"
 
@@ -20,12 +22,18 @@ type RenameAnalysis struct {
 	repository *git.Repository
 }
 
+const (
+	RENAME_ANALYSIS_DEFAULT_THRESHOLD = 90
+
+	ConfigRenameAnalysisSimilarityThreshold = "RenameAnalysis.SimilarityThreshold"
+)
+
 func (ra *RenameAnalysis) Name() string {
 	return "RenameAnalysis"
 }
 
 func (ra *RenameAnalysis) Provides() []string {
-	arr := [...]string{"renamed_changes"}
+	arr := [...]string{"changes"}
 	return arr[:]
 }
 
@@ -34,9 +42,28 @@ func (ra *RenameAnalysis) Requires() []string {
 	return arr[:]
 }
 
+func (ra *RenameAnalysis) ListConfigurationOptions() []ConfigurationOption {
+	options := [...]ConfigurationOption{{
+		Name:        ConfigRenameAnalysisSimilarityThreshold,
+		Description: "The threshold on the similarity index used to detect renames.",
+		Flag:        "M",
+		Type:        IntConfigurationOption,
+		Default:     RENAME_ANALYSIS_DEFAULT_THRESHOLD},
+	}
+	return options[:]
+}
+
+func (ra *RenameAnalysis) Configure(facts map[string]interface{}) {
+	if val, exists := facts[ConfigRenameAnalysisSimilarityThreshold].(int); exists {
+		ra.SimilarityThreshold = val
+	}
+}
+
 func (ra *RenameAnalysis) Initialize(repository *git.Repository) {
 	if ra.SimilarityThreshold < 0 || ra.SimilarityThreshold > 100 {
-		panic("hercules.RenameAnalysis: an invalid SimilarityThreshold was specified")
+		fmt.Fprintf(os.Stderr, "Warning: adjusted the similarity threshold to %d\n",
+			RENAME_ANALYSIS_DEFAULT_THRESHOLD)
+		ra.SimilarityThreshold = RENAME_ANALYSIS_DEFAULT_THRESHOLD
 	}
 	ra.repository = repository
 }
@@ -149,11 +176,7 @@ func (ra *RenameAnalysis) Consume(deps map[string]interface{}) (map[string]inter
 	for _, blob := range deleted_blobs {
 		reduced_changes = append(reduced_changes, blob.change)
 	}
-	return map[string]interface{}{"renamed_changes": reduced_changes}, nil
-}
-
-func (ra *RenameAnalysis) Finalize() interface{} {
-	return nil
+	return map[string]interface{}{"changes": reduced_changes}, nil
 }
 
 func (ra *RenameAnalysis) sizesAreClose(size1 int64, size2 int64) bool {
@@ -233,3 +256,7 @@ func (slice sortableBlobs) Less(i, j int) bool {
 func (slice sortableBlobs) Swap(i, j int) {
 	slice[i], slice[j] = slice[j], slice[i]
 }
+
+func init() {
+	Registry.Register(&RenameAnalysis{})
+}

+ 32 - 11
renames_test.go

@@ -17,29 +17,50 @@ func TestRenameAnalysisMeta(t *testing.T) {
 	ra := fixtureRenameAnalysis()
 	assert.Equal(t, ra.Name(), "RenameAnalysis")
 	assert.Equal(t, len(ra.Provides()), 1)
-	assert.Equal(t, ra.Provides()[0], "renamed_changes")
+	assert.Equal(t, ra.Provides()[0], "changes")
 	assert.Equal(t, len(ra.Requires()), 2)
 	assert.Equal(t, ra.Requires()[0], "blob_cache")
 	assert.Equal(t, ra.Requires()[1], "changes")
+	opts := ra.ListConfigurationOptions()
+	assert.Len(t, opts, 1)
+	assert.Equal(t, opts[0].Name, ConfigRenameAnalysisSimilarityThreshold)
+	ra.SimilarityThreshold = 0
+	facts := map[string]interface{}{}
+	facts[ConfigRenameAnalysisSimilarityThreshold] = 70
+	ra.Configure(facts)
+	assert.Equal(t, ra.SimilarityThreshold, 70)
+	delete(facts, ConfigRenameAnalysisSimilarityThreshold)
+	ra.Configure(facts)
+	assert.Equal(t, ra.SimilarityThreshold, 70)
+}
+
+func TestRenameAnalysisRegistration(t *testing.T) {
+	tp, exists := Registry.registered[(&RenameAnalysis{}).Name()]
+	assert.True(t, exists)
+	assert.Equal(t, tp.Elem().Name(), "RenameAnalysis")
+	tps, exists := Registry.provided[(&RenameAnalysis{}).Provides()[0]]
+	assert.True(t, exists)
+	assert.True(t, len(tps) >= 1)
+	matched := false
+	for _, tp := range tps {
+		matched = matched || tp.Elem().Name() == "RenameAnalysis"
+	}
+	assert.True(t, matched)
 }
 
 func TestRenameAnalysisInitializeInvalidThreshold(t *testing.T) {
 	ra := RenameAnalysis{SimilarityThreshold: -10}
-	assert.Panics(t, func() { ra.Initialize(testRepository) })
+	ra.Initialize(testRepository)
+	assert.Equal(t, ra.SimilarityThreshold, RENAME_ANALYSIS_DEFAULT_THRESHOLD)
 	ra = RenameAnalysis{SimilarityThreshold: 110}
-	assert.Panics(t, func() { ra.Initialize(testRepository) })
+	ra.Initialize(testRepository)
+	assert.Equal(t, ra.SimilarityThreshold, RENAME_ANALYSIS_DEFAULT_THRESHOLD)
 	ra = RenameAnalysis{SimilarityThreshold: 0}
 	ra.Initialize(testRepository)
 	ra = RenameAnalysis{SimilarityThreshold: 100}
 	ra.Initialize(testRepository)
 }
 
-func TestRenameAnalysisFinalize(t *testing.T) {
-	ra := fixtureRenameAnalysis()
-	r := ra.Finalize()
-	assert.Nil(t, r)
-}
-
 func TestRenameAnalysisConsume(t *testing.T) {
 	ra := fixtureRenameAnalysis()
 	changes := make(object.Changes, 3)
@@ -101,12 +122,12 @@ func TestRenameAnalysisConsume(t *testing.T) {
 	ra.SimilarityThreshold = 33
 	res, err := ra.Consume(deps)
 	assert.Nil(t, err)
-	renamed := res["renamed_changes"].(object.Changes)
+	renamed := res["changes"].(object.Changes)
 	assert.Equal(t, len(renamed), 2)
 	ra.SimilarityThreshold = 35
 	res, err = ra.Consume(deps)
 	assert.Nil(t, err)
-	renamed = res["renamed_changes"].(object.Changes)
+	renamed = res["changes"].(object.Changes)
 	assert.Equal(t, len(renamed), 3)
 }
 

+ 7 - 101
stdout/utils.go

@@ -2,11 +2,9 @@ package stdout
 
 import (
 	"fmt"
-	"sort"
+	"io"
 	"strconv"
 	"strings"
-
-	"gopkg.in/src-d/hercules.v2"
 )
 
 func SafeString(str string) string {
@@ -15,7 +13,7 @@ func SafeString(str string) string {
 	return "\"" + str + "\""
 }
 
-func PrintMatrix(matrix [][]int64, name string, fixNegative bool) {
+func PrintMatrix(writer io.Writer, matrix [][]int64, indent int, name string, fixNegative bool) {
 	// determine the maximum length of each value
 	var maxnum int64 = -(1 << 32)
 	var minnum int64 = 1 << 32
@@ -37,15 +35,14 @@ func PrintMatrix(matrix [][]int64, name string, fixNegative bool) {
 		}
 	}
 	last := len(matrix[len(matrix)-1])
-	indent := 2
 	if name != "" {
-		fmt.Printf("  %s: |-\n", SafeString(name))
+		fmt.Fprintf(writer, "%s%s: |-\n", strings.Repeat(" ", indent), SafeString(name))
 		indent += 2
 	}
 	// print the resulting triangular matrix
 	first := true
 	for _, status := range matrix {
-		fmt.Print(strings.Repeat(" ", indent-1))
+		fmt.Fprint(writer, strings.Repeat(" ", indent-1))
 		for i := 0; i < last; i++ {
 			var val int64
 			if i < len(status) {
@@ -57,103 +54,12 @@ func PrintMatrix(matrix [][]int64, name string, fixNegative bool) {
 				}
 			}
 			if !first {
-				fmt.Printf(" %[1]*[2]d", width, val)
+				fmt.Fprintf(writer, " %[1]*[2]d", width, val)
 			} else {
 				first = false
-				fmt.Printf("%d%s", val, strings.Repeat(" ", width-len(strconv.FormatInt(val, 10))))
-			}
-		}
-		fmt.Println()
-	}
-}
-
-func PrintCouples(result *hercules.CouplesResult, peopleDict []string) {
-	fmt.Println("files_coocc:")
-	fmt.Println("  index:")
-	for _, file := range result.Files {
-		fmt.Printf("    - %s\n", SafeString(file))
-	}
-
-	fmt.Println("  matrix:")
-	for _, files := range result.FilesMatrix {
-		fmt.Print("    - {")
-		indices := []int{}
-		for file := range files {
-			indices = append(indices, file)
-		}
-		sort.Ints(indices)
-		for i, file := range indices {
-			fmt.Printf("%d: %d", file, files[file])
-			if i < len(indices)-1 {
-				fmt.Print(", ")
-			}
-		}
-		fmt.Println("}")
-	}
-
-	fmt.Println("people_coocc:")
-	fmt.Println("  index:")
-	for _, person := range peopleDict {
-		fmt.Printf("    - %s\n", SafeString(person))
-	}
-
-	fmt.Println("  matrix:")
-	for _, people := range result.PeopleMatrix {
-		fmt.Print("    - {")
-		indices := []int{}
-		for file := range people {
-			indices = append(indices, file)
-		}
-		sort.Ints(indices)
-		for i, person := range indices {
-			fmt.Printf("%d: %d", person, people[person])
-			if i < len(indices)-1 {
-				fmt.Print(", ")
+				fmt.Fprintf(writer, " %d%s", val, strings.Repeat(" ", width-len(strconv.FormatInt(val, 10))))
 			}
 		}
-		fmt.Println("}")
+		fmt.Fprintln(writer)
 	}
-
-	fmt.Println("  author_files:") // sorted by number of files each author changed
-	peopleFiles := sortByNumberOfFiles(result.PeopleFiles, peopleDict, result.Files)
-	for _, authorFiles := range peopleFiles {
-		fmt.Printf("    - %s:\n", SafeString(authorFiles.Author))
-		sort.Strings(authorFiles.Files)
-		for _, file := range authorFiles.Files {
-			fmt.Printf("      - %s\n", SafeString(file)) // sorted by path
-		}
-	}
-}
-
-func sortByNumberOfFiles(
-	peopleFiles [][]int, peopleDict []string, filesDict []string) authorFilesList {
-	var pfl authorFilesList
-	for peopleIdx, files := range peopleFiles {
-		if peopleIdx < len(peopleDict) {
-			fileNames := make([]string, len(files))
-			for i, fi := range files {
-				fileNames[i] = filesDict[fi]
-			}
-			pfl = append(pfl, authorFiles{peopleDict[peopleIdx], fileNames})
-		}
-	}
-	sort.Sort(pfl)
-	return pfl
-}
-
-type authorFiles struct {
-	Author string
-	Files  []string
-}
-
-type authorFilesList []authorFiles
-
-func (s authorFilesList) Len() int {
-	return len(s)
-}
-func (s authorFilesList) Swap(i, j int) {
-	s[i], s[j] = s[j], s[i]
-}
-func (s authorFilesList) Less(i, j int) bool {
-	return len(s[i].Files) < len(s[j].Files)
 }

+ 200 - 13
toposort/toposort.go

@@ -1,25 +1,48 @@
 package toposort
 
-// Copied from https://github.com/philopon/go-toposort
+import (
+	"bytes"
+	"fmt"
+	"sort"
+	"strings"
+)
+
+// Reworked from https://github.com/philopon/go-toposort
 
 type Graph struct {
-	nodes   []string
+	// Outgoing connections for every node.
 	outputs map[string]map[string]int
+	// How many parents each node has.
 	inputs  map[string]int
 }
 
+// NewGraph initializes a new Graph.
 func NewGraph() *Graph {
 	return &Graph{
-		nodes:   []string{},
 		inputs:  map[string]int{},
 		outputs: map[string]map[string]int{},
 	}
 }
 
-func (g *Graph) AddNode(name string) bool {
-	g.nodes = append(g.nodes, name)
+// Copy clones the graph and returns the independent copy.
+func (g *Graph) Copy() *Graph {
+	clone := NewGraph()
+	for k, v := range g.inputs {
+		clone.inputs[k] = v
+	}
+	for k1, v1 := range g.outputs {
+		m := map[string]int{}
+		clone.outputs[k1] = m
+		for k2, v2 := range v1 {
+			m[k2] = v2
+		}
+	}
+	return clone
+}
 
-	if _, ok := g.outputs[name]; ok {
+// AddNode inserts a new node into the graph.
+func (g *Graph) AddNode(name string) bool {
+	if _, exists := g.outputs[name]; exists {
 		return false
 	}
 	g.outputs[name] = make(map[string]int)
@@ -27,6 +50,7 @@ func (g *Graph) AddNode(name string) bool {
 	return true
 }
 
+// AddNodes inserts multiple nodes into the graph at once.
 func (g *Graph) AddNodes(names ...string) bool {
 	for _, name := range names {
 		if ok := g.AddNode(name); !ok {
@@ -36,16 +60,34 @@ func (g *Graph) AddNodes(names ...string) bool {
 	return true
 }
 
-func (g *Graph) AddEdge(from, to string) bool {
+// AddEdge inserts the link from "from" node to "to" node.
+func (g *Graph) AddEdge(from, to string) int {
 	m, ok := g.outputs[from]
 	if !ok {
-		return false
+		return 0
 	}
 
 	m[to] = len(m) + 1
-	g.inputs[to]++
+	ni := g.inputs[to] + 1
+	g.inputs[to] = ni
 
-	return true
+	return ni
+}
+
+// ReindexNode updates the internal representation of the node after edge removals.
+func (g *Graph) ReindexNode(node string) {
+	children, ok := g.outputs[node]
+	if !ok {
+		return
+	}
+	keys := []string{}
+	for key := range children {
+		keys = append(keys, key)
+	}
+	sort.Strings(keys)
+	for i, key := range keys {
+		children[key] = i + 1
+	}
 }
 
 func (g *Graph) unsafeRemoveEdge(from, to string) {
@@ -53,6 +95,8 @@ func (g *Graph) unsafeRemoveEdge(from, to string) {
 	g.inputs[to]--
 }
 
+// RemoveEdge deletes the link from "from" node to "to" node.
+// Call ReindexNode(from) after you finish modifying the edges.
 func (g *Graph) RemoveEdge(from, to string) bool {
 	if _, ok := g.outputs[from]; !ok {
 		return false
@@ -61,15 +105,17 @@ func (g *Graph) RemoveEdge(from, to string) bool {
 	return true
 }
 
+// Toposort sorts the nodes in the graph in topological order.
 func (g *Graph) Toposort() ([]string, bool) {
-	L := make([]string, 0, len(g.nodes))
-	S := make([]string, 0, len(g.nodes))
+	L := make([]string, 0, len(g.outputs))
+	S := make([]string, 0, len(g.outputs))
 
-	for _, n := range g.nodes {
+	for n := range g.outputs {
 		if g.inputs[n] == 0 {
 			S = append(S, n)
 		}
 	}
+	sort.Strings(S)
 
 	for len(S) > 0 {
 		var n string
@@ -101,3 +147,144 @@ func (g *Graph) Toposort() ([]string, bool) {
 
 	return L, true
 }
+
+// BreadthSort sorts the nodes in the graph in BFS order.
+func (g *Graph) BreadthSort() []string {
+	L := make([]string, 0, len(g.outputs))
+	S := make([]string, 0, len(g.outputs))
+
+	for n := range g.outputs {
+		if g.inputs[n] == 0 {
+			S = append(S, n)
+		}
+	}
+
+	visited := map[string]bool{}
+	for len(S) > 0 {
+		node := S[0]
+		S = S[1:]
+		if _, exists := visited[node]; !exists {
+			L = append(L, node)
+			visited[node] = true
+			for child := range g.outputs[node] {
+				S = append(S, child)
+			}
+		}
+	}
+
+	return L
+}
+
+// FindCycle returns the cycle in the graph which contains "seed" node.
+func (g *Graph) FindCycle(seed string) []string {
+	type edge struct {
+		node string
+		parent string
+	}
+	S := make([]edge, 0, len(g.outputs))
+	S = append(S, edge{seed, ""})
+	visited := map[string]string{}
+	for len(S) > 0 {
+		e := S[0]
+		S = S[1:]
+		if parent, exists := visited[e.node]; !exists || parent == "" {
+			visited[e.node] = e.parent
+			for child := range g.outputs[e.node] {
+				S = append(S, edge{child, e.node})
+			}
+		}
+		if e.node == seed && e.parent != "" {
+			result := []string{}
+			node := e.parent
+			for node != seed {
+				result = append(result, node)
+				node = visited[node]
+			}
+			result = append(result, seed)
+			// reverse
+			for left, right := 0, len(result)-1; left < right; left, right = left+1, right-1 {
+				result[left], result[right] = result[right], result[left]
+			}
+			return result
+		}
+	}
+	return []string{}
+}
+
+// FindParents returns the other ends of incoming edges.
+func (g *Graph) FindParents(to string) []string {
+	result := []string{}
+	for node, children := range g.outputs {
+		if _, exists := children[to]; exists {
+			result = append(result, node)
+		}
+	}
+	return result
+}
+
+// FindChildren returns the other ends of outgoing edges.
+func (g *Graph) FindChildren(from string) []string {
+	result := []string{}
+	for child := range g.outputs[from] {
+		result = append(result, child)
+	}
+	sort.Strings(result)
+	return result
+}
+
+// Serialize outputs the graph in Graphviz format.
+func (g *Graph) Serialize(sorted []string) string {
+	node2index := map[string]int{}
+	for index, node := range sorted {
+		node2index[node] = index
+	}
+	var buffer bytes.Buffer
+	buffer.WriteString("digraph Hercules {\n")
+	nodesFrom := []string{}
+	for nodeFrom := range g.outputs {
+		nodesFrom = append(nodesFrom, nodeFrom)
+	}
+	sort.Strings(nodesFrom)
+  for _, nodeFrom := range nodesFrom {
+	  links := []string{}
+	  for nodeTo := range g.outputs[nodeFrom] {
+		  links = append(links, nodeTo)
+	  }
+	  sort.Strings(links)
+	  for _, nodeTo := range links {
+		  buffer.WriteString(fmt.Sprintf("  \"%d %s\" -> \"%d %s\"\n",
+			  node2index[nodeFrom], nodeFrom, node2index[nodeTo], nodeTo))
+	  }
+  }
+	buffer.WriteString("}")
+	return buffer.String()
+}
+
+func (g *Graph) DebugDump() string {
+	S := make([]string, 0, len(g.outputs))
+	for n := range g.outputs {
+		if g.inputs[n] == 0 {
+			S = append(S, n)
+		}
+	}
+	sort.Strings(S)
+	var buffer bytes.Buffer
+	buffer.WriteString(strings.Join(S, " ") + "\n")
+	keys := []string{}
+	vals := map[string][]string{}
+	for key, val1 := range g.outputs {
+		val2 := make([]string, len(val1))
+		for name, idx := range val1 {
+			val2[idx - 1] = name
+		}
+		keys = append(keys, key)
+		vals[key] = val2
+	}
+	sort.Strings(keys)
+	for _, key := range keys {
+		buffer.WriteString(fmt.Sprintf("%s %d = ", key, g.inputs[key]))
+		outs := vals[key]
+		buffer.WriteString(strings.Join(outs, " ") + "\n")
+	}
+	return buffer.String()
+}

+ 150 - 5
toposort/toposort_test.go

@@ -1,6 +1,9 @@
 package toposort
 
-import "testing"
+import (
+	"testing"
+	"github.com/stretchr/testify/assert"
+)
 
 func index(s []string, v string) int {
 	for i, s := range s {
@@ -16,7 +19,7 @@ type Edge struct {
 	To   string
 }
 
-func TestDuplicatedNode(t *testing.T) {
+func TestToposortDuplicatedNode(t *testing.T) {
 	graph := NewGraph()
 	graph.AddNode("a")
 	if graph.AddNode("a") {
@@ -25,14 +28,14 @@ func TestDuplicatedNode(t *testing.T) {
 
 }
 
-func TestRemoveNotExistEdge(t *testing.T) {
+func TestToposortRemoveNotExistEdge(t *testing.T) {
 	graph := NewGraph()
 	if graph.RemoveEdge("a", "b") {
 		t.Error("not raising not exist edge error")
 	}
 }
 
-func TestWikipedia(t *testing.T) {
+func TestToposortWikipedia(t *testing.T) {
 	graph := NewGraph()
 	graph.AddNodes("2", "3", "5", "7", "8", "9", "10", "11")
 
@@ -68,7 +71,7 @@ func TestWikipedia(t *testing.T) {
 	}
 }
 
-func TestCycle(t *testing.T) {
+func TestToposortCycle(t *testing.T) {
 	graph := NewGraph()
 	graph.AddNodes("1", "2", "3")
 
@@ -81,3 +84,145 @@ func TestCycle(t *testing.T) {
 		t.Error("closed path not detected in closed pathed graph")
 	}
 }
+
+func TestToposortCopy(t *testing.T) {
+	graph := NewGraph()
+	graph.AddNodes("1", "2", "3")
+
+	graph.AddEdge("1", "2")
+	graph.AddEdge("2", "3")
+	graph.AddEdge("3", "1")
+
+	gc := graph.Copy()
+	assert.Equal(t, graph.inputs, gc.inputs)
+	assert.Equal(t, graph.outputs, gc.outputs)
+	delete(graph.outputs, "1")
+	assert.NotEqual(t, graph.outputs, gc.outputs)
+}
+
+func TestToposortReindexNode(t *testing.T) {
+	graph := NewGraph()
+	graph.AddNodes("1", "2", "3")
+
+	graph.AddEdge("1", "2")
+	graph.AddEdge("2", "3")
+	graph.AddEdge("3", "1")
+	graph.AddEdge("1", "3")
+	graph.RemoveEdge("1", "2")
+	assert.Len(t, graph.outputs["1"], 1)
+	assert.Equal(t, graph.outputs["1"]["3"], 2)
+	assert.Equal(t, graph.inputs["2"], 0)
+	graph.ReindexNode("1")
+	assert.Equal(t, graph.outputs["1"]["3"], 1)
+}
+
+func TestToposortBreadthSort(t *testing.T) {
+	graph := NewGraph()
+	graph.AddNodes("0", "1", "2", "3", "4")
+
+	graph.AddEdge("0", "1")
+	graph.AddEdge("1", "2")
+	graph.AddEdge("2", "3")
+	graph.AddEdge("1", "3")
+	graph.AddEdge("3", "4")
+	graph.AddEdge("4", "1")
+	order := graph.BreadthSort()
+	var expected [5]string
+	if order[2] == "2" {
+		expected = [...]string{"0", "1", "2", "3", "4"}
+	} else {
+		expected = [...]string{"0", "1", "3", "2", "4"}
+	}
+	assert.Equal(t, expected[:], order)
+}
+
+func TestToposortFindCycle(t *testing.T) {
+	graph := NewGraph()
+	graph.AddNodes("1", "2", "3", "4", "5")
+
+	graph.AddEdge("1", "2")
+	graph.AddEdge("2", "3")
+	graph.AddEdge("2", "4")
+	graph.AddEdge("3", "1")
+	graph.AddEdge("5", "1")
+
+	cycle := graph.FindCycle("2")
+	expected := [...]string{"2", "3", "1"}
+	assert.Equal(t, expected[:], cycle)
+	cycle = graph.FindCycle("5")
+	assert.Len(t, cycle, 0)
+}
+
+func TestToposortFindParents(t *testing.T) {
+	graph := NewGraph()
+	graph.AddNodes("1", "2", "3", "4", "5")
+
+	graph.AddEdge("1", "2")
+	graph.AddEdge("2", "3")
+	graph.AddEdge("2", "4")
+	graph.AddEdge("3", "1")
+	graph.AddEdge("5", "1")
+
+	parents := graph.FindParents("2")
+	expected := [...]string{"1"}
+	assert.Equal(t, expected[:], parents)
+	parents = graph.FindParents("1")
+	assert.Len(t, parents, 2)
+	checks := [2]bool{}
+	for _, p := range parents {
+		if p == "3" {
+			checks[0] = true
+		} else if p == "5" {
+			checks[1] = true
+		}
+	}
+	assert.Equal(t, [2]bool{true, true}, checks)
+}
+
+func TestToposortFindChildren(t *testing.T) {
+	graph := NewGraph()
+	graph.AddNodes("1", "2", "3", "4", "5")
+
+	graph.AddEdge("1", "2")
+	graph.AddEdge("2", "3")
+	graph.AddEdge("2", "4")
+	graph.AddEdge("3", "1")
+	graph.AddEdge("5", "1")
+
+	children := graph.FindChildren("1")
+	expected := [...]string{"2"}
+	assert.Equal(t, expected[:], children)
+	children = graph.FindChildren("2")
+	assert.Len(t, children, 2)
+	checks := [2]bool{}
+	for _, p := range children {
+		if p == "3" {
+			checks[0] = true
+		} else if p == "4" {
+			checks[1] = true
+		}
+	}
+	assert.Equal(t, [2]bool{true, true}, checks)
+}
+
+
+func TestToposortSerialize(t *testing.T) {
+  graph := NewGraph()
+	graph.AddNodes("1", "2", "3", "4", "5")
+
+	graph.AddEdge("1", "2")
+	graph.AddEdge("2", "3")
+	graph.AddEdge("2", "4")
+	graph.AddEdge("3", "1")
+	graph.AddEdge("5", "1")
+
+	order := [...]string{"5", "4", "3", "2", "1"}
+	gv := graph.Serialize(order[:])
+	assert.Equal(t, `digraph Hercules {
+  "4 1" -> "3 2"
+  "3 2" -> "2 3"
+  "3 2" -> "1 4"
+  "2 3" -> "4 1"
+  "0 5" -> "4 1"
+}`, gv)
+}

+ 8 - 2
tree_diff.go

@@ -24,6 +24,12 @@ func (treediff *TreeDiff) Requires() []string {
 	return []string{}
 }
 
+func (treediff *TreeDiff) ListConfigurationOptions() []ConfigurationOption {
+	return []ConfigurationOption{}
+}
+
+func (treediff *TreeDiff) Configure(facts map[string]interface{}) {}
+
 func (treediff *TreeDiff) Initialize(repository *git.Repository) {
 	treediff.previousTree = nil
 }
@@ -67,6 +73,6 @@ func (treediff *TreeDiff) Consume(deps map[string]interface{}) (map[string]inter
 	return map[string]interface{}{"changes": diff}, nil
 }
 
-func (treediff *TreeDiff) Finalize() interface{} {
-	return nil
+func init() {
+	Registry.Register(&TreeDiff{})
 }

+ 17 - 6
tree_diff_test.go

@@ -11,6 +11,7 @@ import (
 
 func fixtureTreeDiff() *TreeDiff {
 	td := TreeDiff{}
+	td.Configure(nil)
 	td.Initialize(testRepository)
 	return &td
 }
@@ -21,6 +22,22 @@ func TestTreeDiffMeta(t *testing.T) {
 	assert.Equal(t, len(td.Requires()), 0)
 	assert.Equal(t, len(td.Provides()), 1)
 	assert.Equal(t, td.Provides()[0], "changes")
+	opts := td.ListConfigurationOptions()
+	assert.Len(t, opts, 0)
+}
+
+func TestTreeDiffRegistration(t *testing.T) {
+	tp, exists := Registry.registered[(&TreeDiff{}).Name()]
+	assert.True(t, exists)
+	assert.Equal(t, tp.Elem().Name(), "TreeDiff")
+	tps, exists := Registry.provided[(&TreeDiff{}).Provides()[0]]
+	assert.True(t, exists)
+	assert.True(t, len(tps) >= 1)
+	matched := false
+	for _, tp := range tps {
+		matched = matched || tp.Elem().Name() == "TreeDiff"
+	}
+	assert.True(t, matched)
 }
 
 func TestTreeDiffConsume(t *testing.T) {
@@ -81,9 +98,3 @@ func TestTreeDiffConsumeFirst(t *testing.T) {
 		assert.Equal(t, action, merkletrie.Insert)
 	}
 }
-
-func TestTreeDiffFinalize(t *testing.T) {
-	id := fixtureTreeDiff()
-	res := id.Finalize()
-	assert.Nil(t, res)
-}

+ 497 - 0
uast.go

@@ -0,0 +1,497 @@
+package hercules
+
+import (
+	"bytes"
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	goioutil "io/ioutil"
+	"os"
+	"runtime"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/gogo/protobuf/proto"
+	"github.com/jeffail/tunny"
+	"gopkg.in/bblfsh/client-go.v2"
+	"gopkg.in/bblfsh/sdk.v1/protocol"
+	"gopkg.in/bblfsh/sdk.v1/uast"
+	"gopkg.in/src-d/enry.v1"
+	"gopkg.in/src-d/go-git.v4"
+	"gopkg.in/src-d/go-git.v4/plumbing"
+	"gopkg.in/src-d/go-git.v4/plumbing/object"
+	"gopkg.in/src-d/go-git.v4/utils/ioutil"
+	"gopkg.in/src-d/go-git.v4/utils/merkletrie"
+	"gopkg.in/src-d/hercules.v3/pb"
+)
+
+type UASTExtractor struct {
+	Endpoint       string
+	Context        func() context.Context
+	PoolSize       int
+	Languages      map[string]bool
+	FailOnErrors   bool
+	ProcessedFiles map[string]int
+
+	clients []*bblfsh.Client
+	pool    *tunny.WorkPool
+}
+
+const (
+	UAST_EXTRACTION_SKIPPED = -(1 << 31)
+
+	ConfigUASTEndpoint     = "ConfigUASTEndpoint"
+	ConfigUASTTimeout      = "ConfigUASTTimeout"
+	ConfigUASTPoolSize     = "ConfigUASTPoolSize"
+	ConfigUASTFailOnErrors = "ConfigUASTFailOnErrors"
+	ConfigUASTLanguages    = "ConfigUASTLanguages"
+)
+
+type uastTask struct {
+	Client *bblfsh.Client
+	Lock   *sync.RWMutex
+	Dest   map[plumbing.Hash]*uast.Node
+	File   *object.File
+	Errors *[]error
+	Status chan int
+}
+
+type worker struct {
+	Client *bblfsh.Client
+	Job    func(interface{}) interface{}
+}
+
+func (w worker) TunnyReady() bool {
+	return true
+}
+
+func (w worker) TunnyJob(data interface{}) interface{} {
+	task := data.(uastTask)
+	task.Client = w.Client
+	return w.Job(task)
+}
+
+func (exr *UASTExtractor) Name() string {
+	return "UAST"
+}
+
+func (exr *UASTExtractor) Provides() []string {
+	arr := [...]string{"uasts"}
+	return arr[:]
+}
+
+func (exr *UASTExtractor) Requires() []string {
+	arr := [...]string{"changes", "blob_cache"}
+	return arr[:]
+}
+
+func (exr *UASTExtractor) Features() []string {
+	arr := [...]string{"uast"}
+	return arr[:]
+}
+
+func (exr *UASTExtractor) ListConfigurationOptions() []ConfigurationOption {
+	options := [...]ConfigurationOption{{
+		Name:        ConfigUASTEndpoint,
+		Description: "How many days there are in a single band.",
+		Flag:        "bblfsh",
+		Type:        StringConfigurationOption,
+		Default:     "0.0.0.0:9432"}, {
+		Name:        ConfigUASTTimeout,
+		Description: "Babelfish's server timeout in seconds.",
+		Flag:        "bblfsh-timeout",
+		Type:        IntConfigurationOption,
+		Default:     20}, {
+		Name:        ConfigUASTPoolSize,
+		Description: "Number of goroutines to extract UASTs.",
+		Flag:        "bblfsh-pool-size",
+		Type:        IntConfigurationOption,
+		Default:     runtime.NumCPU()}, {
+		Name:        ConfigUASTFailOnErrors,
+		Description: "Panic if there is a UAST extraction error.",
+		Flag:        "bblfsh-fail-on-error",
+		Type:        BoolConfigurationOption,
+		Default:     false}, {
+		Name:        ConfigUASTLanguages,
+		Description: "Programming languages from which to extract UASTs. Separated by comma \",\".",
+		Flag:        "languages",
+		Type:        StringConfigurationOption,
+		Default:     "Python,Java"},
+	}
+	return options[:]
+}
+
+func (exr *UASTExtractor) Configure(facts map[string]interface{}) {
+	if val, exists := facts[ConfigUASTEndpoint].(string); exists {
+		exr.Endpoint = val
+	}
+	if val, exists := facts[ConfigUASTTimeout].(int); exists {
+		exr.Context = func() context.Context {
+			ctx, _ := context.WithTimeout(context.Background(),
+				time.Duration(val)*time.Second)
+			return ctx
+		}
+	}
+	if val, exists := facts[ConfigUASTPoolSize].(int); exists {
+		exr.PoolSize = val
+	}
+	if val, exists := facts[ConfigUASTLanguages].(string); exists {
+		exr.Languages = map[string]bool{}
+		for _, lang := range strings.Split(val, ",") {
+			exr.Languages[strings.TrimSpace(lang)] = true
+		}
+	}
+	if val, exists := facts[ConfigUASTFailOnErrors].(bool); exists {
+		exr.FailOnErrors = val
+	}
+}
+
+func (exr *UASTExtractor) Initialize(repository *git.Repository) {
+	if exr.Context == nil {
+		exr.Context = func() context.Context { return context.Background() }
+	}
+	poolSize := exr.PoolSize
+	if poolSize == 0 {
+		poolSize = runtime.NumCPU()
+	}
+	var err error
+	exr.clients = make([]*bblfsh.Client, poolSize)
+	for i := 0; i < poolSize; i++ {
+		client, err := bblfsh.NewClient(exr.Endpoint)
+		if err != nil {
+			panic(err)
+		}
+		exr.clients[i] = client
+	}
+	if exr.pool != nil {
+		exr.pool.Close()
+	}
+	workers := make([]tunny.TunnyWorker, poolSize)
+	for i := 0; i < poolSize; i++ {
+		workers[i] = worker{Client: exr.clients[i], Job: exr.extractTask}
+	}
+	exr.pool, err = tunny.CreateCustomPool(workers).Open()
+	if err != nil {
+		panic(err)
+	}
+	exr.ProcessedFiles = map[string]int{}
+	if exr.Languages == nil {
+		exr.Languages = map[string]bool{}
+	}
+}
+
+func (exr *UASTExtractor) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
+	cache := deps["blob_cache"].(map[plumbing.Hash]*object.Blob)
+	treeDiffs := deps["changes"].(object.Changes)
+	uasts := map[plumbing.Hash]*uast.Node{}
+	lock := sync.RWMutex{}
+	errs := make([]error, 0)
+	status := make(chan int)
+	pending := 0
+	submit := func(change *object.Change) {
+		{
+			reader, err := cache[change.To.TreeEntry.Hash].Reader()
+			if err != nil {
+				errs = append(errs, err)
+				return
+			}
+			defer ioutil.CheckClose(reader, &err)
+
+			buf := new(bytes.Buffer)
+			if _, err := buf.ReadFrom(reader); err != nil {
+				errs = append(errs, err)
+				return
+			}
+			lang := enry.GetLanguage(change.To.Name, buf.Bytes())
+			if _, exists := exr.Languages[lang]; !exists {
+				exr.ProcessedFiles[change.To.Name] = UAST_EXTRACTION_SKIPPED
+				return
+			}
+			exr.ProcessedFiles[change.To.Name]++
+		}
+		pending++
+		exr.pool.SendWorkAsync(uastTask{
+			Lock:   &lock,
+			Dest:   uasts,
+			File:   &object.File{Name: change.To.Name, Blob: *cache[change.To.TreeEntry.Hash]},
+			Errors: &errs, Status: status}, nil)
+	}
+	for _, change := range treeDiffs {
+		action, err := change.Action()
+		if err != nil {
+			return nil, err
+		}
+		switch action {
+		case merkletrie.Insert:
+			submit(change)
+		case merkletrie.Delete:
+			continue
+		case merkletrie.Modify:
+			submit(change)
+		}
+	}
+	for i := 0; i < pending; i++ {
+		_ = <-status
+	}
+	if len(errs) > 0 {
+		msgs := make([]string, len(errs))
+		for i, err := range errs {
+			msgs[i] = err.Error()
+		}
+		joined := strings.Join(msgs, "\n")
+		if exr.FailOnErrors {
+			return nil, errors.New(joined)
+		} else {
+			fmt.Fprintln(os.Stderr, joined)
+		}
+	}
+	return map[string]interface{}{"uasts": uasts}, nil
+}
+
+func (exr *UASTExtractor) extractUAST(
+	client *bblfsh.Client, file *object.File) (*uast.Node, error) {
+	request := client.NewParseRequest()
+	contents, err := file.Contents()
+	if err != nil {
+		return nil, err
+	}
+	request.Content(contents)
+	request.Filename(file.Name)
+	response, err := request.DoWithContext(exr.Context())
+	if err != nil {
+		if strings.Contains("missing driver", err.Error()) {
+			return nil, nil
+		}
+		return nil, err
+	}
+	if response.Status != protocol.Ok {
+		return nil, errors.New(strings.Join(response.Errors, "\n"))
+	}
+	if err != nil {
+		return nil, err
+	}
+	return response.UAST, nil
+}
+
+func (exr *UASTExtractor) extractTask(data interface{}) interface{} {
+	task := data.(uastTask)
+	defer func() { task.Status <- 0 }()
+	node, err := exr.extractUAST(task.Client, task.File)
+	task.Lock.Lock()
+	defer task.Lock.Unlock()
+	if err != nil {
+		*task.Errors = append(*task.Errors, errors.New(task.File.Name+": "+err.Error()))
+		return nil
+	}
+	if node != nil {
+		task.Dest[task.File.Hash] = node
+	}
+	return nil
+}
+
+type UASTChange struct {
+	Before *uast.Node
+	After  *uast.Node
+	Change *object.Change
+}
+
+type UASTChanges struct {
+	cache map[plumbing.Hash]*uast.Node
+}
+
+func (uc *UASTChanges) Name() string {
+	return "UASTChanges"
+}
+
+func (uc *UASTChanges) Provides() []string {
+	arr := [...]string{"changed_uasts"}
+	return arr[:]
+}
+
+func (uc *UASTChanges) Requires() []string {
+	arr := [...]string{"uasts", "changes"}
+	return arr[:]
+}
+
+func (uc *UASTChanges) Features() []string {
+	arr := [...]string{"uast"}
+	return arr[:]
+}
+
+func (uc *UASTChanges) ListConfigurationOptions() []ConfigurationOption {
+	return []ConfigurationOption{}
+}
+
+func (uc *UASTChanges) Configure(facts map[string]interface{}) {}
+
+func (uc *UASTChanges) Initialize(repository *git.Repository) {
+	uc.cache = map[plumbing.Hash]*uast.Node{}
+}
+
+func (uc *UASTChanges) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
+	uasts := deps["uasts"].(map[plumbing.Hash]*uast.Node)
+	treeDiffs := deps["changes"].(object.Changes)
+	commit := make([]UASTChange, 0, len(treeDiffs))
+	for _, change := range treeDiffs {
+		action, err := change.Action()
+		if err != nil {
+			return nil, err
+		}
+		switch action {
+		case merkletrie.Insert:
+			hashTo := change.To.TreeEntry.Hash
+			uastTo := uasts[hashTo]
+			commit = append(commit, UASTChange{Before: nil, After: uastTo, Change: change})
+			uc.cache[hashTo] = uastTo
+		case merkletrie.Delete:
+			hashFrom := change.From.TreeEntry.Hash
+			commit = append(commit, UASTChange{Before: uc.cache[hashFrom], After: nil, Change: change})
+			delete(uc.cache, hashFrom)
+		case merkletrie.Modify:
+			hashFrom := change.From.TreeEntry.Hash
+			hashTo := change.To.TreeEntry.Hash
+			uastTo := uasts[hashTo]
+			commit = append(commit, UASTChange{Before: uc.cache[hashFrom], After: uastTo, Change: change})
+			delete(uc.cache, hashFrom)
+			uc.cache[hashTo] = uastTo
+		}
+	}
+	return map[string]interface{}{"changed_uasts": commit}, nil
+}
+
+type UASTChangesSaver struct {
+	// OutputPath points to the target directory with UASTs
+	OutputPath string
+
+	repository *git.Repository
+	result     [][]UASTChange
+}
+
+const (
+	ConfigUASTChangesSaverOutputPath = "UASTChangesSaver.OutputPath"
+)
+
+func (saver *UASTChangesSaver) Name() string {
+	return "UASTChangesSaver"
+}
+
+func (saver *UASTChangesSaver) Provides() []string {
+	return []string{}
+}
+
+func (saver *UASTChangesSaver) Requires() []string {
+	arr := [...]string{"changed_uasts"}
+	return arr[:]
+}
+
+func (saver *UASTChangesSaver) Features() []string {
+	arr := [...]string{"uast"}
+	return arr[:]
+}
+
+func (saver *UASTChangesSaver) ListConfigurationOptions() []ConfigurationOption {
+	options := [...]ConfigurationOption{{
+		Name:        ConfigUASTChangesSaverOutputPath,
+		Description: "The target directory where to store the changed UAST files.",
+		Flag:        "changed-uast-dir",
+		Type:        StringConfigurationOption,
+		Default:     "."},
+	}
+	return options[:]
+}
+
+func (saver *UASTChangesSaver) Flag() string {
+	return "dump-uast-changes"
+}
+
+func (saver *UASTChangesSaver) Configure(facts map[string]interface{}) {
+	if val, exists := facts[ConfigUASTChangesSaverOutputPath]; exists {
+		saver.OutputPath = val.(string)
+	}
+}
+
+func (saver *UASTChangesSaver) Initialize(repository *git.Repository) {
+	saver.repository = repository
+	saver.result = [][]UASTChange{}
+}
+
+func (saver *UASTChangesSaver) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
+	changes := deps["changed_uasts"].([]UASTChange)
+	saver.result = append(saver.result, changes)
+	return nil, nil
+}
+
+func (saver *UASTChangesSaver) Finalize() interface{} {
+	return saver.result
+}
+
+func (saver *UASTChangesSaver) Serialize(result interface{}, binary bool, writer io.Writer) error {
+	saverResult := result.([][]UASTChange)
+	fileNames := saver.dumpFiles(saverResult)
+	if binary {
+		return saver.serializeBinary(fileNames, writer)
+	}
+	saver.serializeText(fileNames, writer)
+	return nil
+}
+
+func (saver *UASTChangesSaver) dumpFiles(result [][]UASTChange) []*pb.UASTChange {
+	fileNames := []*pb.UASTChange{}
+	for i, changes := range result {
+		for j, change := range changes {
+			if change.Before == nil || change.After == nil {
+				continue
+			}
+			record := &pb.UASTChange{FileName: change.Change.To.Name}
+			bs, _ := change.Before.Marshal()
+			record.UastBefore = fmt.Sprintf(
+				"%d_%d_before_%s.pb", i, j, change.Change.From.TreeEntry.Hash.String())
+			goioutil.WriteFile(record.UastBefore, bs, 0666)
+			blob, _ := saver.repository.BlobObject(change.Change.From.TreeEntry.Hash)
+			s, _ := (&object.File{Blob: *blob}).Contents()
+			record.SrcBefore = fmt.Sprintf(
+				"%d_%d_before_%s.src", i, j, change.Change.From.TreeEntry.Hash.String())
+			goioutil.WriteFile(record.SrcBefore, []byte(s), 0666)
+			bs, _ = change.After.Marshal()
+			record.UastAfter = fmt.Sprintf(
+				"%d_%d_after_%s.pb", i, j, change.Change.To.TreeEntry.Hash.String())
+			goioutil.WriteFile(record.UastAfter, bs, 0666)
+			blob, _ = saver.repository.BlobObject(change.Change.To.TreeEntry.Hash)
+			s, _ = (&object.File{Blob: *blob}).Contents()
+			record.SrcAfter = fmt.Sprintf(
+				"%d_%d_after_%s.src", i, j, change.Change.To.TreeEntry.Hash.String())
+			goioutil.WriteFile(record.SrcAfter, []byte(s), 0666)
+			fileNames = append(fileNames, record)
+		}
+	}
+	return fileNames
+}
+
+func (saver *UASTChangesSaver) serializeText(result []*pb.UASTChange, writer io.Writer) {
+	for _, sc := range result {
+		kv := [...]string{
+			"file: " + sc.FileName,
+			"src0: " + sc.SrcBefore, "src1: " + sc.SrcAfter,
+			"uast0: " + sc.UastBefore, "uast1: " + sc.UastAfter,
+		}
+		fmt.Fprintf(writer, "  - {%s}\n", strings.Join(kv[:], ", "))
+	}
+}
+
+func (saver *UASTChangesSaver) serializeBinary(result []*pb.UASTChange, writer io.Writer) error {
+	message := pb.UASTChangesSaverResults{Changes: result}
+	serialized, err := proto.Marshal(&message)
+	if err != nil {
+		return err
+	}
+	writer.Write(serialized)
+	return nil
+}
+
+func init() {
+	Registry.Register(&UASTExtractor{})
+	Registry.Register(&UASTChanges{})
+	Registry.Register(&UASTChangesSaver{})
+}

+ 301 - 0
uast_test.go

@@ -0,0 +1,301 @@
+package hercules
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"gopkg.in/bblfsh/sdk.v1/uast"
+	"gopkg.in/src-d/go-git.v4/plumbing"
+	"gopkg.in/src-d/go-git.v4/plumbing/object"
+)
+
+func fixtureUASTExtractor() *UASTExtractor {
+	exr := UASTExtractor{Endpoint: "0.0.0.0:9432"}
+	exr.Initialize(testRepository)
+	exr.Languages["Python"] = true
+	return &exr
+}
+
+func TestUASTExtractorMeta(t *testing.T) {
+	exr := fixtureUASTExtractor()
+	assert.Equal(t, exr.Name(), "UAST")
+	assert.Equal(t, len(exr.Provides()), 1)
+	assert.Equal(t, exr.Provides()[0], "uasts")
+	assert.Equal(t, len(exr.Requires()), 2)
+	assert.Equal(t, exr.Requires()[0], "changes")
+	assert.Equal(t, exr.Requires()[1], "blob_cache")
+	opts := exr.ListConfigurationOptions()
+	assert.Len(t, opts, 5)
+	assert.Equal(t, opts[0].Name, ConfigUASTEndpoint)
+	assert.Equal(t, opts[1].Name, ConfigUASTTimeout)
+	assert.Equal(t, opts[2].Name, ConfigUASTPoolSize)
+	assert.Equal(t, opts[3].Name, ConfigUASTFailOnErrors)
+	assert.Equal(t, opts[4].Name, ConfigUASTLanguages)
+	feats := exr.Features()
+	assert.Len(t, feats, 1)
+	assert.Equal(t, feats[0], "uast")
+}
+
+func TestUASTExtractorConfiguration(t *testing.T) {
+	exr := fixtureUASTExtractor()
+	facts := map[string]interface{}{}
+	exr.Configure(facts)
+	facts[ConfigUASTEndpoint] = "localhost:9432"
+	facts[ConfigUASTTimeout] = 15
+	facts[ConfigUASTPoolSize] = 7
+	facts[ConfigUASTLanguages] = "C, Go"
+	facts[ConfigUASTFailOnErrors] = true
+	exr.Configure(facts)
+	assert.Equal(t, exr.Endpoint, facts[ConfigUASTEndpoint])
+	assert.NotNil(t, exr.Context)
+	assert.Equal(t, exr.PoolSize, facts[ConfigUASTPoolSize])
+	assert.True(t, exr.Languages["C"])
+	assert.True(t, exr.Languages["Go"])
+	assert.False(t, exr.Languages["Python"])
+	assert.Equal(t, exr.FailOnErrors, true)
+}
+
+func TestUASTExtractorRegistration(t *testing.T) {
+	tp, exists := Registry.registered[(&UASTExtractor{}).Name()]
+	assert.True(t, exists)
+	assert.Equal(t, tp.Elem().Name(), "UASTExtractor")
+	tps, exists := Registry.provided[(&UASTExtractor{}).Provides()[0]]
+	assert.True(t, exists)
+	assert.Len(t, tps, 1)
+	assert.Equal(t, tps[0].Elem().Name(), "UASTExtractor")
+}
+
+func TestUASTExtractorConsume(t *testing.T) {
+	exr := fixtureUASTExtractor()
+	changes := make(object.Changes, 2)
+	// 2b1ed978194a94edeabbca6de7ff3b5771d4d665
+	treeFrom, _ := testRepository.TreeObject(plumbing.NewHash(
+		"96c6ece9b2f3c7c51b83516400d278dea5605100"))
+	treeTo, _ := testRepository.TreeObject(plumbing.NewHash(
+		"251f2094d7b523d5bcc60e663b6cf38151bf8844"))
+	changes[0] = &object.Change{From: object.ChangeEntry{
+		Name: "analyser.go",
+		Tree: treeFrom,
+		TreeEntry: object.TreeEntry{
+			Name: "analyser.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("baa64828831d174f40140e4b3cfa77d1e917a2c1"),
+		},
+	}, To: object.ChangeEntry{},
+	}
+	changes[1] = &object.Change{From: object.ChangeEntry{
+		Name: "cmd/hercules/main.go",
+		Tree: treeFrom,
+		TreeEntry: object.TreeEntry{
+			Name: "cmd/hercules/main.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("c29112dbd697ad9b401333b80c18a63951bc18d9"),
+		},
+	}, To: object.ChangeEntry{
+		Name: "cmd/hercules/main.go",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: "cmd/hercules/main.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("f7d918ec500e2f925ecde79b51cc007bac27de72"),
+		},
+	},
+	}
+	cache := map[plumbing.Hash]*object.Blob{}
+	hash := plumbing.NewHash("baa64828831d174f40140e4b3cfa77d1e917a2c1")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	hash = plumbing.NewHash("5d78f57d732aed825764347ec6f3ab74d50d0619")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	hash = plumbing.NewHash("c29112dbd697ad9b401333b80c18a63951bc18d9")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	hash = plumbing.NewHash("f7d918ec500e2f925ecde79b51cc007bac27de72")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	deps := map[string]interface{}{}
+	deps["blob_cache"] = cache
+	deps["changes"] = changes
+	res, err := exr.Consume(deps)
+	// Language not enabled
+	assert.Len(t, res["uasts"], 0)
+	assert.Nil(t, err)
+	exr.Languages["Go"] = true
+	res, err = exr.Consume(deps)
+	// No Go driver
+	assert.Len(t, res["uasts"], 0)
+	assert.Nil(t, err)
+
+	hash = plumbing.NewHash("5d78f57d732aed825764347ec6f3ab74d50d0619")
+	changes[1] = &object.Change{From: object.ChangeEntry{}, To: object.ChangeEntry{
+		Name: "labours.py",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: "labours.py",
+			Mode: 0100644,
+			Hash: hash,
+		},
+	},
+	}
+
+	res, err = exr.Consume(deps)
+	assert.Nil(t, err)
+	uasts := res["uasts"].(map[plumbing.Hash]*uast.Node)
+	assert.Equal(t, len(uasts), 1)
+	assert.Equal(t, len(uasts[hash].Children), 24)
+}
+
+func fixtureUASTChanges() *UASTChanges {
+	ch := UASTChanges{}
+	ch.Configure(nil)
+	ch.Initialize(testRepository)
+	return &ch
+}
+
+func TestUASTChangesMeta(t *testing.T) {
+	ch := fixtureUASTChanges()
+	assert.Equal(t, ch.Name(), "UASTChanges")
+	assert.Equal(t, len(ch.Provides()), 1)
+	assert.Equal(t, ch.Provides()[0], "changed_uasts")
+	assert.Equal(t, len(ch.Requires()), 2)
+	assert.Equal(t, ch.Requires()[0], "uasts")
+	assert.Equal(t, ch.Requires()[1], "changes")
+	opts := ch.ListConfigurationOptions()
+	assert.Len(t, opts, 0)
+	feats := ch.Features()
+	assert.Len(t, feats, 1)
+	assert.Equal(t, feats[0], "uast")
+}
+
+func TestUASTChangesRegistration(t *testing.T) {
+	tp, exists := Registry.registered[(&UASTChanges{}).Name()]
+	assert.True(t, exists)
+	assert.Equal(t, tp.Elem().Name(), "UASTChanges")
+	tps, exists := Registry.provided[(&UASTChanges{}).Provides()[0]]
+	assert.True(t, exists)
+	assert.True(t, len(tps) >= 1)
+	matched := false
+	for _, tp := range tps {
+		matched = matched || tp.Elem().Name() == "UASTChanges"
+	}
+	assert.True(t, matched)
+}
+
+func TestUASTChangesConsume(t *testing.T) {
+	uastsArray := []*uast.Node{}
+	uasts := map[plumbing.Hash]*uast.Node{}
+	hash := plumbing.NewHash("291286b4ac41952cbd1389fda66420ec03c1a9fe")
+	uasts[hash] = &uast.Node{}
+	uasts[hash].InternalType = "uno"
+	uastsArray = append(uastsArray, uasts[hash])
+	hash = plumbing.NewHash("c29112dbd697ad9b401333b80c18a63951bc18d9")
+	uasts[hash] = &uast.Node{}
+	uasts[hash].InternalType = "dos"
+	uastsArray = append(uastsArray, uasts[hash])
+	hash = plumbing.NewHash("baa64828831d174f40140e4b3cfa77d1e917a2c1")
+	uasts[hash] = &uast.Node{}
+	uasts[hash].InternalType = "tres"
+	uastsArray = append(uastsArray, uasts[hash])
+	hash = plumbing.NewHash("dc248ba2b22048cc730c571a748e8ffcf7085ab9")
+	uasts[hash] = &uast.Node{}
+	uasts[hash].InternalType = "quatro"
+	uastsArray = append(uastsArray, uasts[hash])
+	changes := make(object.Changes, 3)
+	treeFrom, _ := testRepository.TreeObject(plumbing.NewHash(
+		"a1eb2ea76eb7f9bfbde9b243861474421000eb96"))
+	treeTo, _ := testRepository.TreeObject(plumbing.NewHash(
+		"994eac1cd07235bb9815e547a75c84265dea00f5"))
+	changes[0] = &object.Change{From: object.ChangeEntry{
+		Name: "analyser.go",
+		Tree: treeFrom,
+		TreeEntry: object.TreeEntry{
+			Name: "analyser.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("dc248ba2b22048cc730c571a748e8ffcf7085ab9"),
+		},
+	}, To: object.ChangeEntry{
+		Name: "analyser.go",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: "analyser.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("baa64828831d174f40140e4b3cfa77d1e917a2c1"),
+		},
+	}}
+	changes[1] = &object.Change{From: object.ChangeEntry{}, To: object.ChangeEntry{
+		Name: "cmd/hercules/main.go",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: "cmd/hercules/main.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("c29112dbd697ad9b401333b80c18a63951bc18d9"),
+		},
+	},
+	}
+	changes[2] = &object.Change{To: object.ChangeEntry{}, From: object.ChangeEntry{
+		Name: ".travis.yml",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: ".travis.yml",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("291286b4ac41952cbd1389fda66420ec03c1a9fe"),
+		},
+	},
+	}
+	deps := map[string]interface{}{}
+	deps["uasts"] = uasts
+	deps["changes"] = changes
+	ch := fixtureUASTChanges()
+	ch.cache[changes[0].From.TreeEntry.Hash] = uastsArray[3]
+	ch.cache[changes[2].From.TreeEntry.Hash] = uastsArray[0]
+	resultMap, err := ch.Consume(deps)
+	assert.Nil(t, err)
+	result := resultMap["changed_uasts"].([]UASTChange)
+	assert.Len(t, result, 3)
+	assert.Equal(t, result[0].Change, changes[0])
+	assert.Equal(t, result[0].Before, uastsArray[3])
+	assert.Equal(t, result[0].After, uastsArray[2])
+	assert.Equal(t, result[1].Change, changes[1])
+	assert.Nil(t, result[1].Before)
+	assert.Equal(t, result[1].After, uastsArray[1])
+	assert.Equal(t, result[2].Change, changes[2])
+	assert.Equal(t, result[2].Before, uastsArray[0])
+	assert.Nil(t, result[2].After)
+}
+
+func fixtureUASTChangesSaver() *UASTChangesSaver {
+	ch := UASTChangesSaver{}
+	ch.Initialize(testRepository)
+	return &ch
+}
+
+func TestUASTChangesSaverMeta(t *testing.T) {
+	ch := fixtureUASTChangesSaver()
+	assert.Equal(t, ch.Name(), "UASTChangesSaver")
+	assert.Equal(t, len(ch.Provides()), 0)
+	assert.Equal(t, len(ch.Requires()), 1)
+	assert.Equal(t, ch.Requires()[0], "changed_uasts")
+	opts := ch.ListConfigurationOptions()
+	assert.Len(t, opts, 1)
+	assert.Equal(t, opts[0].Name, ConfigUASTChangesSaverOutputPath)
+	feats := ch.Features()
+	assert.Len(t, feats, 1)
+	assert.Equal(t, feats[0], "uast")
+	assert.Equal(t, ch.Flag(), "dump-uast-changes")
+}
+
+func TestUASTChangesSaverConfiguration(t *testing.T) {
+	facts := map[string]interface{}{}
+	ch := fixtureUASTChangesSaver()
+	ch.Configure(facts)
+	assert.Empty(t, ch.OutputPath)
+	facts[ConfigUASTChangesSaverOutputPath] = "libre"
+	ch.Configure(facts)
+	assert.Equal(t, ch.OutputPath, "libre")
+}
+
+func TestUASTChangesSaverRegistration(t *testing.T) {
+	tp, exists := Registry.registered[(&UASTChangesSaver{}).Name()]
+	assert.True(t, exists)
+	assert.Equal(t, tp.Elem().Name(), "UASTChangesSaver")
+	tp, exists = Registry.flags[(&UASTChangesSaver{}).Flag()]
+	assert.True(t, exists)
+	assert.Equal(t, tp.Elem().Name(), "UASTChangesSaver")
+}

+ 3 - 0
version.go

@@ -0,0 +1,3 @@
+package hercules
+
+var GIT_HASH = "<unknown>"