|
@@ -29,9 +29,11 @@ package main
|
|
|
|
|
|
import (
|
|
import (
|
|
"bytes"
|
|
"bytes"
|
|
|
|
+ "context"
|
|
"flag"
|
|
"flag"
|
|
"fmt"
|
|
"fmt"
|
|
"io"
|
|
"io"
|
|
|
|
+ "io/ioutil"
|
|
"net/http"
|
|
"net/http"
|
|
_ "net/http/pprof"
|
|
_ "net/http/pprof"
|
|
"os"
|
|
"os"
|
|
@@ -49,6 +51,7 @@ import (
|
|
"gopkg.in/src-d/hercules.v2/stdout"
|
|
"gopkg.in/src-d/hercules.v2/stdout"
|
|
"gopkg.in/src-d/hercules.v2/pb"
|
|
"gopkg.in/src-d/hercules.v2/pb"
|
|
"github.com/gogo/protobuf/proto"
|
|
"github.com/gogo/protobuf/proto"
|
|
|
|
+ "time"
|
|
)
|
|
)
|
|
|
|
|
|
func sortedKeys(m map[string][][]int64) []string {
|
|
func sortedKeys(m map[string][][]int64) []string {
|
|
@@ -82,9 +85,13 @@ func main() {
|
|
var withFiles bool
|
|
var withFiles bool
|
|
var withPeople bool
|
|
var withPeople bool
|
|
var withCouples bool
|
|
var withCouples bool
|
|
|
|
+ var withUasts bool
|
|
var people_dict_path string
|
|
var people_dict_path string
|
|
var profile bool
|
|
var profile bool
|
|
- var granularity, sampling, similarity_threshold int
|
|
|
|
|
|
+ var granularity, sampling, similarityThreshold int
|
|
|
|
+ var bblfshEndpoint string
|
|
|
|
+ var bblfshTimeout int
|
|
|
|
+ var uastPoolSize int
|
|
var commitsFile string
|
|
var commitsFile string
|
|
var ignoreMissingSubmodules bool
|
|
var ignoreMissingSubmodules bool
|
|
var debug bool
|
|
var debug bool
|
|
@@ -92,11 +99,13 @@ func main() {
|
|
flag.BoolVar(&withPeople, "people", false, "Output detailed statistics per each developer.")
|
|
flag.BoolVar(&withPeople, "people", false, "Output detailed statistics per each developer.")
|
|
flag.BoolVar(&withCouples, "couples", false, "Gather the co-occurrence matrix "+
|
|
flag.BoolVar(&withCouples, "couples", false, "Gather the co-occurrence matrix "+
|
|
"for files and people.")
|
|
"for files and people.")
|
|
|
|
+ flag.BoolVar(&withUasts, "uasts", false, "Output pairs of Universal Abstract Syntax Trees for " +
|
|
|
|
+ "every changed file in each commit.")
|
|
flag.StringVar(&people_dict_path, "people-dict", "", "Path to the developers' email associations.")
|
|
flag.StringVar(&people_dict_path, "people-dict", "", "Path to the developers' email associations.")
|
|
flag.BoolVar(&profile, "profile", false, "Collect the profile to hercules.pprof.")
|
|
flag.BoolVar(&profile, "profile", false, "Collect the profile to hercules.pprof.")
|
|
flag.IntVar(&granularity, "granularity", 30, "How many days there are in a single band.")
|
|
flag.IntVar(&granularity, "granularity", 30, "How many days there are in a single band.")
|
|
flag.IntVar(&sampling, "sampling", 30, "How frequently to record the state in days.")
|
|
flag.IntVar(&sampling, "sampling", 30, "How frequently to record the state in days.")
|
|
- flag.IntVar(&similarity_threshold, "M", 90,
|
|
|
|
|
|
+ flag.IntVar(&similarityThreshold, "M", 90,
|
|
"A threshold on the similarity index used to detect renames.")
|
|
"A threshold on the similarity index used to detect renames.")
|
|
flag.BoolVar(&debug, "debug", false, "Validate the trees on each step.")
|
|
flag.BoolVar(&debug, "debug", false, "Validate the trees on each step.")
|
|
flag.StringVar(&commitsFile, "commits", "", "Path to the text file with the "+
|
|
flag.StringVar(&commitsFile, "commits", "", "Path to the text file with the "+
|
|
@@ -106,6 +115,9 @@ func main() {
|
|
flag.BoolVar(&ignoreMissingSubmodules, "ignore-missing-submodules", false,
|
|
flag.BoolVar(&ignoreMissingSubmodules, "ignore-missing-submodules", false,
|
|
"Do not panic on submodules which are not registered..")
|
|
"Do not panic on submodules which are not registered..")
|
|
flag.BoolVar(&protobuf, "pb", false, "The output format will be Protocol Buffers instead of YAML.")
|
|
flag.BoolVar(&protobuf, "pb", false, "The output format will be Protocol Buffers instead of YAML.")
|
|
|
|
+ flag.IntVar(&uastPoolSize, "uast-pool-size", 1, "Number of goroutines to extract UASTs.")
|
|
|
|
+ flag.StringVar(&bblfshEndpoint, "bblfsh", "0.0.0.0:9432", "Babelfish server's endpoint.")
|
|
|
|
+ flag.IntVar(&bblfshTimeout, "bblfsh-timeout", 20, "Babelfish's server timeout.")
|
|
flag.Parse()
|
|
flag.Parse()
|
|
if granularity <= 0 {
|
|
if granularity <= 0 {
|
|
fmt.Fprint(os.Stderr, "Warning: adjusted the granularity to 1 day\n")
|
|
fmt.Fprint(os.Stderr, "Warning: adjusted the granularity to 1 day\n")
|
|
@@ -180,22 +192,37 @@ func main() {
|
|
pipeline.AddItem(&hercules.BlobCache{
|
|
pipeline.AddItem(&hercules.BlobCache{
|
|
IgnoreMissingSubmodules: ignoreMissingSubmodules,
|
|
IgnoreMissingSubmodules: ignoreMissingSubmodules,
|
|
})
|
|
})
|
|
|
|
+ var uastSaver *hercules.UASTChangesSaver
|
|
|
|
+ if withUasts {
|
|
|
|
+ pipeline.AddItem(&hercules.UASTExtractor{
|
|
|
|
+ Endpoint: bblfshEndpoint,
|
|
|
|
+ Context: func() context.Context {
|
|
|
|
+ ctx, _ := context.WithTimeout(context.Background(),
|
|
|
|
+ time.Duration(bblfshTimeout) * time.Second)
|
|
|
|
+ return ctx
|
|
|
|
+ },
|
|
|
|
+ PoolSize: uastPoolSize,
|
|
|
|
+ Extensions: map[string]bool {"py": true, "java": true}})
|
|
|
|
+ pipeline.AddItem(&hercules.UASTChanges{})
|
|
|
|
+ uastSaver = &hercules.UASTChangesSaver{}
|
|
|
|
+ pipeline.AddItem(uastSaver)
|
|
|
|
+ }
|
|
pipeline.AddItem(&hercules.DaysSinceStart{})
|
|
pipeline.AddItem(&hercules.DaysSinceStart{})
|
|
- pipeline.AddItem(&hercules.RenameAnalysis{SimilarityThreshold: similarity_threshold})
|
|
|
|
|
|
+ pipeline.AddItem(&hercules.RenameAnalysis{SimilarityThreshold: similarityThreshold})
|
|
pipeline.AddItem(&hercules.TreeDiff{})
|
|
pipeline.AddItem(&hercules.TreeDiff{})
|
|
pipeline.AddItem(&hercules.FileDiff{})
|
|
pipeline.AddItem(&hercules.FileDiff{})
|
|
- id_matcher := &hercules.IdentityDetector{}
|
|
|
|
|
|
+ idMatcher := &hercules.IdentityDetector{}
|
|
var peopleCount int
|
|
var peopleCount int
|
|
if withPeople || withCouples {
|
|
if withPeople || withCouples {
|
|
if people_dict_path != "" {
|
|
if people_dict_path != "" {
|
|
- id_matcher.LoadPeopleDict(people_dict_path)
|
|
|
|
- peopleCount = len(id_matcher.ReversePeopleDict) - 1
|
|
|
|
|
|
+ idMatcher.LoadPeopleDict(people_dict_path)
|
|
|
|
+ peopleCount = len(idMatcher.ReversePeopleDict) - 1
|
|
} else {
|
|
} else {
|
|
- id_matcher.GeneratePeopleDict(commits)
|
|
|
|
- peopleCount = len(id_matcher.ReversePeopleDict)
|
|
|
|
|
|
+ idMatcher.GeneratePeopleDict(commits)
|
|
|
|
+ peopleCount = len(idMatcher.ReversePeopleDict)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- pipeline.AddItem(id_matcher)
|
|
|
|
|
|
+ pipeline.AddItem(idMatcher)
|
|
burndowner := &hercules.BurndownAnalysis{
|
|
burndowner := &hercules.BurndownAnalysis{
|
|
Granularity: granularity,
|
|
Granularity: granularity,
|
|
Sampling: sampling,
|
|
Sampling: sampling,
|
|
@@ -217,23 +244,47 @@ func main() {
|
|
}
|
|
}
|
|
fmt.Fprint(os.Stderr, "writing... \r")
|
|
fmt.Fprint(os.Stderr, "writing... \r")
|
|
burndownResults := result[burndowner].(hercules.BurndownResult)
|
|
burndownResults := result[burndowner].(hercules.BurndownResult)
|
|
|
|
+ if len(burndownResults.GlobalHistory) == 0 {
|
|
|
|
+ return
|
|
|
|
+ }
|
|
var couplesResult hercules.CouplesResult
|
|
var couplesResult hercules.CouplesResult
|
|
if withCouples {
|
|
if withCouples {
|
|
couplesResult = result[coupler].(hercules.CouplesResult)
|
|
couplesResult = result[coupler].(hercules.CouplesResult)
|
|
}
|
|
}
|
|
- if len(burndownResults.GlobalHistory) == 0 {
|
|
|
|
- return
|
|
|
|
|
|
+ if withUasts {
|
|
|
|
+ changedUasts := result[uastSaver].([][]hercules.UASTChange)
|
|
|
|
+ for i, changes := range changedUasts {
|
|
|
|
+ for j, change := range changes {
|
|
|
|
+ if change.Before == nil || change.After == nil {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ bs, _ := change.Before.Marshal()
|
|
|
|
+ ioutil.WriteFile(fmt.Sprintf(
|
|
|
|
+ "%d_%d_before_%s.pb", i, j, change.Change.From.TreeEntry.Hash.String()), bs, 0666)
|
|
|
|
+ blob, _ := repository.BlobObject(change.Change.From.TreeEntry.Hash)
|
|
|
|
+ s, _ := (&object.File{Blob: *blob}).Contents()
|
|
|
|
+ ioutil.WriteFile(fmt.Sprintf(
|
|
|
|
+ "%d_%d_before_%s.src", i, j, change.Change.From.TreeEntry.Hash.String()), []byte(s), 0666)
|
|
|
|
+ bs, _ = change.After.Marshal()
|
|
|
|
+ ioutil.WriteFile(fmt.Sprintf(
|
|
|
|
+ "%d_%d_after_%s.pb", i, j, change.Change.To.TreeEntry.Hash.String()), bs, 0666)
|
|
|
|
+ blob, _ = repository.BlobObject(change.Change.To.TreeEntry.Hash)
|
|
|
|
+ s, _ = (&object.File{Blob: *blob}).Contents()
|
|
|
|
+ ioutil.WriteFile(fmt.Sprintf(
|
|
|
|
+ "%d_%d_after_%s.src", i, j, change.Change.To.TreeEntry.Hash.String()), []byte(s), 0666)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
}
|
|
}
|
|
begin := commits[0].Author.When.Unix()
|
|
begin := commits[0].Author.When.Unix()
|
|
end := commits[len(commits)-1].Author.When.Unix()
|
|
end := commits[len(commits)-1].Author.When.Unix()
|
|
if !protobuf {
|
|
if !protobuf {
|
|
printResults(uri, begin, end, granularity, sampling,
|
|
printResults(uri, begin, end, granularity, sampling,
|
|
withFiles, withPeople, withCouples,
|
|
withFiles, withPeople, withCouples,
|
|
- burndownResults, couplesResult, id_matcher.ReversePeopleDict)
|
|
|
|
|
|
+ burndownResults, couplesResult, idMatcher.ReversePeopleDict)
|
|
} else {
|
|
} else {
|
|
serializeResults(uri, begin, end, granularity, sampling,
|
|
serializeResults(uri, begin, end, granularity, sampling,
|
|
withFiles, withPeople, withCouples,
|
|
withFiles, withPeople, withCouples,
|
|
- burndownResults, couplesResult, id_matcher.ReversePeopleDict)
|
|
|
|
|
|
+ burndownResults, couplesResult, idMatcher.ReversePeopleDict)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|