123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219 |
- package hercules
- import (
- "bufio"
- "os"
- "sort"
- "strings"
- "gopkg.in/src-d/go-git.v4"
- "gopkg.in/src-d/go-git.v4/plumbing/object"
- )
- type IdentityDetector struct {
- // Maps email || name -> developer id.
- PeopleDict map[string]int
- // Maps developer id -> description
- ReversedPeopleDict []string
- }
- const (
- MISSING_AUTHOR = (1 << 18) - 1
- SELF_AUTHOR = (1 << 18) - 2
- UNMATCHED_AUTHOR = "<unmatched>"
- FactIdentityDetectorPeopleDict = "IdentityDetector.PeopleDict"
- FactIdentityDetectorReversedPeopleDict = "IdentityDetector.ReversedPeopleDict"
- ConfigIdentityDetectorPeopleDictPath = "IdentityDetector.PeopleDictPath"
- FactIdentityDetectorPeopleCount = "IdentityDetector.PeopleCount"
- )
- func (id *IdentityDetector) Name() string {
- return "IdentityDetector"
- }
- func (id *IdentityDetector) Provides() []string {
- arr := [...]string{"author"}
- return arr[:]
- }
- func (id *IdentityDetector) Requires() []string {
- return []string{}
- }
- func (id *IdentityDetector) ListConfigurationOptions() []ConfigurationOption {
- options := [...]ConfigurationOption{{
- Name: ConfigIdentityDetectorPeopleDictPath,
- Description: "Path to the developers' email associations.",
- Flag: "people-dict",
- Type: StringConfigurationOption,
- Default: ""},
- }
- return options[:]
- }
- func (id *IdentityDetector) Configure(facts map[string]interface{}) {
- if val, exists := facts[FactIdentityDetectorPeopleDict].(map[string]int); exists {
- id.PeopleDict = val
- }
- if val, exists := facts[FactIdentityDetectorReversedPeopleDict].([]string); exists {
- id.ReversedPeopleDict = val
- }
- if id.PeopleDict == nil || id.ReversedPeopleDict == nil {
- peopleDictPath, _ := facts[ConfigIdentityDetectorPeopleDictPath].(string)
- if peopleDictPath != "" {
- id.LoadPeopleDict(peopleDictPath)
- facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict) - 1
- } else {
- if _, exists := facts[FactPipelineCommits]; !exists {
- panic("IdentityDetector needs a list of commits to initialize.")
- }
- id.GeneratePeopleDict(facts[FactPipelineCommits].([]*object.Commit))
- facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict)
- }
- } else {
- facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict)
- }
- facts[FactIdentityDetectorPeopleDict] = id.PeopleDict
- facts[FactIdentityDetectorReversedPeopleDict] = id.ReversedPeopleDict
- }
- func (id *IdentityDetector) Initialize(repository *git.Repository) {
- }
- func (self *IdentityDetector) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
- commit := deps["commit"].(*object.Commit)
- signature := commit.Author
- id, exists := self.PeopleDict[strings.ToLower(signature.Email)]
- if !exists {
- id, exists = self.PeopleDict[strings.ToLower(signature.Name)]
- if !exists {
- id = MISSING_AUTHOR
- }
- }
- return map[string]interface{}{"author": id}, nil
- }
- func (id *IdentityDetector) LoadPeopleDict(path string) error {
- file, err := os.Open(path)
- if err != nil {
- return err
- }
- defer file.Close()
- scanner := bufio.NewScanner(file)
- dict := make(map[string]int)
- reverse_dict := []string{}
- size := 0
- for scanner.Scan() {
- ids := strings.Split(scanner.Text(), "|")
- for _, id := range ids {
- dict[strings.ToLower(id)] = size
- }
- reverse_dict = append(reverse_dict, ids[0])
- size += 1
- }
- reverse_dict = append(reverse_dict, UNMATCHED_AUTHOR)
- id.PeopleDict = dict
- id.ReversedPeopleDict = reverse_dict
- return nil
- }
- func (id *IdentityDetector) GeneratePeopleDict(commits []*object.Commit) {
- dict := map[string]int{}
- emails := map[int][]string{}
- names := map[int][]string{}
- size := 0
- mailmapFile, err := commits[len(commits)-1].File(".mailmap")
- if err == nil {
- mailMapContents, err := mailmapFile.Contents()
- if err == nil {
- mailmap := ParseMailmap(mailMapContents)
- for key, val := range mailmap {
- key = strings.ToLower(key)
- toEmail := strings.ToLower(val.Email)
- toName := strings.ToLower(val.Name)
- id, exists := dict[toEmail]
- if !exists {
- id, exists = dict[toName]
- }
- if exists {
- dict[key] = id
- } else {
- id = size
- size++
- if toEmail != "" {
- dict[toEmail] = id
- emails[id] = append(emails[id], toEmail)
- }
- if toName != "" {
- dict[toName] = id
- names[id] = append(names[id], toName)
- }
- dict[key] = id
- }
- if strings.Contains(key, "@") {
- exists := false
- for _, val := range emails[id] {
- if key == val {
- exists = true
- break
- }
- }
- if !exists {
- emails[id] = append(emails[id], key)
- }
- } else {
- exists := false
- for _, val := range names[id] {
- if key == val {
- exists = true
- break
- }
- }
- if !exists {
- names[id] = append(names[id], key)
- }
- }
- }
- }
- }
- for _, commit := range commits {
- email := strings.ToLower(commit.Author.Email)
- name := strings.ToLower(commit.Author.Name)
- id, exists := dict[email]
- if exists {
- _, exists := dict[name]
- if !exists {
- dict[name] = id
- names[id] = append(names[id], name)
- }
- continue
- }
- id, exists = dict[name]
- if exists {
- dict[email] = id
- emails[id] = append(emails[id], email)
- continue
- }
- dict[email] = size
- dict[name] = size
- emails[size] = append(emails[size], email)
- names[size] = append(names[size], name)
- size += 1
- }
- reverse_dict := make([]string, size)
- for _, val := range dict {
- sort.Strings(names[val])
- sort.Strings(emails[val])
- reverse_dict[val] = strings.Join(names[val], "|") + "|" + strings.Join(emails[val], "|")
- }
- id.PeopleDict = dict
- id.ReversedPeopleDict = reverse_dict
- }
- func init() {
- Registry.Register(&IdentityDetector{})
- }
|