Sfoglia il codice sorgente

Merge pull request #27 from vmarkovtsev/master

Add the churn plugin example
Vadim Markovtsev 7 anni fa
parent
commit
b7bbf5012e

+ 2 - 0
.gitignore

@@ -1,4 +1,6 @@
 cmd/hercules-generate-plugin/plugin_template_source.go
+contrib/_plugin_example/churn_analysis.pb.go
+pb/pb.pb.go
 
 **/.DS_Store
 .idea

+ 3 - 2
.travis.yml

@@ -39,13 +39,14 @@ install:
   - docker exec -it bblfshd bblfshctl driver install --all
   
 script:
-  - go vet .
+  - go vet ./...
   - go test -v -cpu=1,2 -coverprofile=coverage.txt -covermode=count gopkg.in/src-d/hercules.v3
   - $GOPATH/bin/hercules -version
   - $GOPATH/bin/hercules -burndown -burndown-files -burndown-people -couples -quiet https://github.com/src-d/hercules | python3 labours.py -m all -o out --backend Agg --disable-projector
   - $GOPATH/bin/hercules -burndown -burndown-files -burndown-people -couples -quiet -pb https://github.com/src-d/hercules | python3 labours.py -f pb -m all -o out --backend Agg --disable-projector
   - $GOPATH/bin/hercules-generate-plugin -version
-  - $GOPATH/bin/hercules-generate-plugin -n MyPlug -o myplug && cd myplug && make
+  - $GOPATH/bin/hercules-generate-plugin -n MyPlug -o myplug && cd myplug && make && cd -
+  - cd contrib/_plugin_example && make
 
 after_success:
   - bash <(curl -s https://codecov.io/bash)

+ 10 - 1
PLUGINS.md

@@ -37,4 +37,13 @@ make
 
 ```
 hercules -plugin my_plugin_name.so -my-plugin-name https://github.com/user/repo
-```
+```
+
+### Example
+
+See [contrib/plugin_example](contrib/_plugin_example). It was generated by `hercules-generate-plugin`
+and implements [code churn](https://blog.gitprime.com/why-code-churn-matters/) analysis through time.
+It uses many Hercules features and supports YAML and protobuf output formats.
+
+![go-git global churn](doc/churn_global.png)
+<p align="center">Generated with <code>hercules -plugin churn_analysis.so -churn https://github.com/src-d/go-git | python3 plot_churn.py --tick-days 14 -</code></p>

+ 7 - 0
contrib/_plugin_example/Makefile

@@ -0,0 +1,7 @@
+all: churn_analysis.so
+
+churn_analysis.so: churn_analysis.go churn_analysis.pb.go
+	go build -buildmode=plugin churn_analysis.go churn_analysis.pb.go
+
+churn_analysis.pb.go: churn_analysis.proto
+	PATH=$$PATH:$$GOPATH/bin protoc --gogo_out=. --proto_path=. churn_analysis.proto

+ 276 - 0
contrib/_plugin_example/churn_analysis.go

@@ -0,0 +1,276 @@
+package main
+
+import (
+	"fmt"
+	"io"
+	"sort"
+	"strings"
+	"unicode/utf8"
+
+  "gopkg.in/src-d/go-git.v4"
+	"gopkg.in/src-d/go-git.v4/plumbing/object"
+	"gopkg.in/src-d/go-git.v4/utils/merkletrie"
+	"gopkg.in/src-d/go-git.v4/plumbing"
+	"gopkg.in/src-d/hercules.v3"
+	"gopkg.in/src-d/hercules.v3/yaml"
+	"github.com/gogo/protobuf/proto"
+	"github.com/sergi/go-diff/diffmatchpatch"
+)
+
+// ChurnAnalysis contains the intermediate state which is mutated by Consume(). It should implement
+// hercules.LeafPipelineItem.
+type ChurnAnalysis struct {
+	TrackPeople bool
+
+	global []editInfo
+	people map[int][]editInfo
+
+	// references IdentityDetector.ReversedPeopleDict
+	reversedPeopleDict []string
+}
+
+type editInfo struct {
+	Day int
+	Added int
+	Removed int
+}
+
+// ChurnAnalysisResult is returned by Finalize() and represents the analysis result.
+type ChurnAnalysisResult struct {
+  Global Edits
+	People map[string]Edits
+}
+
+type Edits struct {
+	Days      []int
+	Additions []int
+	Removals  []int
+}
+
+const (
+	ConfigChurnTrackPeople = "Churn.TrackPeople"
+)
+
+// Analysis' name in the graph is usually the same as the type's name, however, does not have to.
+func (churn *ChurnAnalysis) Name() string {
+	return "ChurnAnalysis"
+}
+
+// LeafPipelineItem-s normally do not act as intermediate nodes and thus we return an empty slice.
+func (churn *ChurnAnalysis) Provides() []string {
+	return []string{}
+}
+
+// Requires returns the list of dependencies which must be supplied in Consume().
+// file_diff - line diff for each commit change
+// changes - list of changed files for each commit
+// blob_cache - set of blobs affected by each commit
+// day - number of days since start for each commit
+// author - author of the commit
+func (churn *ChurnAnalysis) Requires() []string {
+	arr := [...]string{"file_diff", "changes", "blob_cache", "day", "author"}
+	return arr[:]
+}
+
+// ListConfigurationOptions tells the engine which parameters can be changed through the command
+// line.
+func (churn *ChurnAnalysis) ListConfigurationOptions() []hercules.ConfigurationOption {
+	opts := [...]hercules.ConfigurationOption {{
+		Name:        ConfigChurnTrackPeople,
+		Description: "Record detailed statistics per each developer.",
+		Flag:        "churn-people",
+		Type:        hercules.BoolConfigurationOption,
+		Default:     false},
+	}
+	return opts[:]
+}
+
+// Flag returns the command line switch which activates the analysis.
+func (churn *ChurnAnalysis) Flag() string {
+	return "churn"
+}
+
+// Configure applies the parameters specified in the command line. Map keys correspond to "Name".
+func (churn *ChurnAnalysis) Configure(facts map[string]interface{}) {
+	if val, exists := facts[ConfigChurnTrackPeople].(bool); exists {
+		churn.TrackPeople = val
+	}
+	if churn.TrackPeople {
+		churn.reversedPeopleDict = facts[hercules.FactIdentityDetectorReversedPeopleDict].([]string)
+	}
+}
+
+// Initialize resets the internal temporary data structures and prepares the object for Consume().
+func (churn *ChurnAnalysis) Initialize(repository *git.Repository) {
+	churn.global = []editInfo{}
+	churn.people = map[int][]editInfo{}
+}
+
+func (churn *ChurnAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
+	fileDiffs := deps["file_diff"].(map[string]hercules.FileDiffData)
+	treeDiffs := deps["changes"].(object.Changes)
+	cache := deps["blob_cache"].(map[plumbing.Hash]*object.Blob)
+	day := deps["day"].(int)
+	author := deps["author"].(int)
+	for _, change := range treeDiffs {
+		action, err := change.Action()
+		if err != nil {
+			return nil, err
+		}
+		added := 0; removed := 0
+		switch action {
+		case merkletrie.Insert:
+			added, err = hercules.CountLines(cache[change.To.TreeEntry.Hash])
+			if err != nil && err.Error() == "binary" {
+				err = nil
+			}
+		case merkletrie.Delete:
+			removed, err = hercules.CountLines(cache[change.From.TreeEntry.Hash])
+			if err != nil && err.Error() == "binary" {
+				err = nil
+			}
+		case merkletrie.Modify:
+			diffs := fileDiffs[change.To.Name]
+			for _, edit := range diffs.Diffs {
+				length := utf8.RuneCountInString(edit.Text)
+				switch edit.Type {
+				case diffmatchpatch.DiffEqual:
+					continue
+				case diffmatchpatch.DiffInsert:
+					added += length
+				case diffmatchpatch.DiffDelete:
+					removed += length
+				}
+			}
+
+		}
+		if err != nil {
+			return nil, err
+		}
+		ei := editInfo{Day: day, Added: added, Removed: removed}
+		churn.global = append(churn.global, ei)
+		if churn.TrackPeople {
+			seq, exists := churn.people[author]
+			if !exists {
+				seq = []editInfo{}
+			}
+			seq = append(seq, ei)
+			churn.people[author] = seq
+		}
+	}
+	return nil, nil
+}
+
+func (churn *ChurnAnalysis) Finalize() interface{} {
+  result := ChurnAnalysisResult{
+	  Global: editInfosToEdits(churn.global),
+	  People: map[string]Edits{},
+  }
+	if churn.TrackPeople {
+		for key, val := range churn.people {
+			result.People[churn.reversedPeopleDict[key]] = editInfosToEdits(val)
+		}
+	}
+  return result
+}
+
+func (churn *ChurnAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
+	burndownResult := result.(ChurnAnalysisResult)
+	if binary {
+		return churn.serializeBinary(&burndownResult, writer)
+	}
+	churn.serializeText(&burndownResult, writer)
+	return nil
+}
+
+func (churn *ChurnAnalysis) serializeText(result *ChurnAnalysisResult, writer io.Writer) {
+  fmt.Fprintln(writer, "  global:")
+	printEdits(result.Global, writer, 4)
+	for key, val := range result.People {
+		fmt.Fprintf(writer, "  %s:\n", yaml.SafeString(key))
+		printEdits(val, writer, 4)
+	}
+}
+
+func (churn *ChurnAnalysis) serializeBinary(result *ChurnAnalysisResult, writer io.Writer) error {
+	message := ChurnAnalysisResultMessage{
+		Global: editsToEditsMessage(result.Global),
+		People: map[string]*EditsMessage{},
+	}
+	for key, val := range result.People {
+		message.People[key] = editsToEditsMessage(val)
+	}
+	serialized, err := proto.Marshal(&message)
+	if err != nil {
+		return err
+	}
+	writer.Write(serialized)
+  return nil
+}
+
+func editInfosToEdits(eis []editInfo) Edits {
+	aux := map[int]*editInfo{}
+	for _, ei := range eis {
+		ptr := aux[ei.Day]
+		if ptr == nil {
+			ptr = &editInfo{Day: ei.Day}
+		}
+		ptr.Added += ei.Added
+		ptr.Removed += ei.Removed
+		aux[ei.Day] = ptr
+	}
+	seq := []int{}
+	for key := range aux {
+		seq = append(seq, key)
+	}
+	sort.Ints(seq)
+	edits := Edits{
+		Days:      make([]int, len(seq)),
+		Additions: make([]int, len(seq)),
+		Removals:  make([]int, len(seq)),
+	}
+	for i, day := range seq {
+		edits.Days[i] = day
+		edits.Additions[i] = aux[day].Added
+		edits.Removals[i] = aux[day].Removed
+	}
+	return edits
+}
+
+func printEdits(edits Edits, writer io.Writer, indent int) {
+	strIndent := strings.Repeat(" ", indent)
+	printArray := func(arr []int, name string) {
+	  fmt.Fprintf(writer, "%s%s: [", strIndent, name)
+		for i, v := range arr {
+			if i < len(arr) - 1 {
+				fmt.Fprintf(writer, "%d, ", v)
+			} else {
+				fmt.Fprintf(writer, "%d]\n", v)
+			}
+		}
+	}
+	printArray(edits.Days, "days")
+	printArray(edits.Additions, "additions")
+	printArray(edits.Removals, "removals")
+}
+
+func editsToEditsMessage(edits Edits) *EditsMessage {
+	message := &EditsMessage{
+		Days: make([]uint32, len(edits.Days)),
+		Additions: make([]uint32, len(edits.Additions)),
+		Removals: make([]uint32, len(edits.Removals)),
+	}
+	copyInts := func(arr []int, where []uint32) {
+		for i, v := range arr {
+			where[i] = uint32(v)
+		}
+	}
+	copyInts(edits.Days, message.Days)
+	copyInts(edits.Additions, message.Additions)
+	copyInts(edits.Removals, message.Removals)
+	return message
+}
+
+func init() {
+	hercules.Registry.Register(&ChurnAnalysis{})
+}

+ 14 - 0
contrib/_plugin_example/churn_analysis.proto

@@ -0,0 +1,14 @@
+syntax = "proto3";
+option go_package = "main";
+
+message EditsMessage {
+    // all three are of the same length
+    repeated uint32 days = 1;
+    repeated uint32 additions = 2;
+    repeated uint32 removals = 3;
+}
+
+message ChurnAnalysisResultMessage {
+  EditsMessage global = 1;
+  map<string, EditsMessage> people = 2;
+}

+ 81 - 0
contrib/_plugin_example/plot_churn.py

@@ -0,0 +1,81 @@
+import argparse
+from datetime import datetime, timedelta
+import os
+import re
+import sys
+import yaml
+
+from matplotlib import pyplot
+import matplotlib.dates as mdates
+import pandas
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("input", help="Path to the input YAML file. \"-\" means stdin.")
+    parser.add_argument("-o", "--output", help="Output directory. If empty, display the plots.")
+    parser.add_argument("-f", "--format", choices=("png", "svg"), default="png",
+                        help="Output format")
+    parser.add_argument("--tick-days", type=int, default=7, help="Ticks interval in days.")
+    args = parser.parse_args()
+    return args
+
+
+def parse_input(file):
+    yaml.reader.Reader.NON_PRINTABLE = re.compile(r"(?!x)x")
+    try:
+        loader = yaml.CLoader
+    except AttributeError:
+        print("Warning: failed to import yaml.CLoader, falling back to slow yaml.Loader")
+        loader = yaml.Loader
+    try:
+        if file != "-":
+            with open(file) as fin:
+                return yaml.load(fin, Loader=loader)
+        else:
+            return yaml.load(sys.stdin, Loader=loader)
+    except (UnicodeEncodeError, yaml.reader.ReaderError) as e:
+        print("\nInvalid unicode in the input: %s\nPlease filter it through "
+              "fix_yaml_unicode.py" % e)
+        sys.exit(1)
+
+
+def plot_churn(name, data, url, beginTime, endTime, output, fmt, tick_interval):
+    days, adds, dels = data["days"], data["additions"], data["removals"]
+    dates = [beginTime + timedelta(days=d) for d in days]
+    df = pandas.DataFrame(data=list(zip(adds, dels)),
+                          index=dates,
+                          columns=("additions", "removals"))
+    df["removals"] = -df["removals"]
+    df = df.reindex(pandas.date_range(beginTime, endTime, freq="D"))
+    pyplot.figure(figsize=(16, 9))
+    for spine in pyplot.gca().spines.values():
+        spine.set_visible(False)
+    pyplot.gca().xaxis.set_major_locator(mdates.DayLocator(interval=tick_interval))
+    pyplot.gca().xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d"))
+    pyplot.tick_params(top="off", bottom="off", left="off", right="off", labelleft="off", labelbottom="on")
+    pyplot.bar(df.index, df["additions"], label="additions")
+    pyplot.bar(df.index, df["removals"], label="removals")
+    pyplot.xticks(rotation="vertical")
+    pyplot.legend(loc=1)
+    pyplot.title("%s churn plot, %s" % (name, url), fontsize=24)
+    if not output:
+        pyplot.show()
+    else:
+        os.makedirs(output, exist_ok=True)
+        pyplot.savefig(os.path.join(output, name.replace("/", "_") + "." + fmt),
+                       bbox_inches="tight", transparent=True)
+
+
+def main():
+    args = parse_args()
+    data = parse_input(args.input)
+    beginTime, endTime = (datetime.fromtimestamp(data["hercules"][t])
+                          for t in ("begin_unix_time", "end_unix_time"))
+    for key, val in data["ChurnAnalysis"].items():
+        plot_churn(key, val, data["hercules"]["repository"], beginTime, endTime,
+                   args.output, args.format, args.tick_days)
+
+
+if __name__ == "__main__":
+    sys.exit(main())

BIN
doc/churn_global.png