| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264 | package herculesimport (	"bufio"	"os"	"sort"	"strings"	"gopkg.in/src-d/go-git.v4"	"gopkg.in/src-d/go-git.v4/plumbing/object")// IdentityDetector determines the author of a commit. Same person can commit under different// signatures, and we apply some heuristics to merge those together.// It is a PipelineItem.type IdentityDetector struct {	// PeopleDict maps email || name  -> developer id.	PeopleDict map[string]int	// ReversedPeopleDict maps developer id -> description	ReversedPeopleDict []string}const (	// AuthorMissing is the internal author index which denotes any unmatched identities	// (IdentityDetector.Consume()).	AuthorMissing = (1 << 18) - 1	// AuthorMissingName is the string name which corresponds to AuthorMissing.	AuthorMissingName = "<unmatched>"	// FactIdentityDetectorPeopleDict is the name of the fact which is inserted in	// IdentityDetector.Configure(). It corresponds to IdentityDetector.PeopleDict - the mapping	// from the signatures to the author indices.	FactIdentityDetectorPeopleDict = "IdentityDetector.PeopleDict"	// FactIdentityDetectorReversedPeopleDict is the name of the fact which is inserted in	// IdentityDetector.Configure(). It corresponds to IdentityDetector.ReversedPeopleDict -	// the mapping from the author indices to the main signature.	FactIdentityDetectorReversedPeopleDict = "IdentityDetector.ReversedPeopleDict"	// ConfigIdentityDetectorPeopleDictPath is the name of the configuration option	// (IdentityDetector.Configure()) which allows to set the external PeopleDict mapping from a file.	ConfigIdentityDetectorPeopleDictPath = "IdentityDetector.PeopleDictPath"	// FactIdentityDetectorPeopleCount is the name of the fact which is inserted in	// IdentityDetector.Configure(). It is equal to the overall number of unique authors	// (the length of ReversedPeopleDict).	FactIdentityDetectorPeopleCount = "IdentityDetector.PeopleCount"	// DependencyAuthor is the name of the dependency provided by IdentityDetector.	DependencyAuthor = "author")func (id *IdentityDetector) Name() string {	return "IdentityDetector"}func (id *IdentityDetector) Provides() []string {	arr := [...]string{DependencyAuthor}	return arr[:]}func (id *IdentityDetector) Requires() []string {	return []string{}}func (id *IdentityDetector) ListConfigurationOptions() []ConfigurationOption {	options := [...]ConfigurationOption{{		Name:        ConfigIdentityDetectorPeopleDictPath,		Description: "Path to the developers' email associations.",		Flag:        "people-dict",		Type:        StringConfigurationOption,		Default:     ""},	}	return options[:]}func (id *IdentityDetector) Configure(facts map[string]interface{}) {	if val, exists := facts[FactIdentityDetectorPeopleDict].(map[string]int); exists {		id.PeopleDict = val	}	if val, exists := facts[FactIdentityDetectorReversedPeopleDict].([]string); exists {		id.ReversedPeopleDict = val	}	if id.PeopleDict == nil || id.ReversedPeopleDict == nil {		peopleDictPath, _ := facts[ConfigIdentityDetectorPeopleDictPath].(string)		if peopleDictPath != "" {			id.LoadPeopleDict(peopleDictPath)			facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict) - 1		} else {			if _, exists := facts[ConfigPipelineCommits]; !exists {				panic("IdentityDetector needs a list of commits to initialize.")			}			id.GeneratePeopleDict(facts[ConfigPipelineCommits].([]*object.Commit))			facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict)		}	} else {		facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict)	}	facts[FactIdentityDetectorPeopleDict] = id.PeopleDict	facts[FactIdentityDetectorReversedPeopleDict] = id.ReversedPeopleDict}func (id *IdentityDetector) Initialize(repository *git.Repository) {}func (id *IdentityDetector) Consume(deps map[string]interface{}) (map[string]interface{}, error) {	commit := deps["commit"].(*object.Commit)	signature := commit.Author	authorID, exists := id.PeopleDict[strings.ToLower(signature.Email)]	if !exists {		authorID, exists = id.PeopleDict[strings.ToLower(signature.Name)]		if !exists {			authorID = AuthorMissing		}	}	return map[string]interface{}{DependencyAuthor: authorID}, nil}func (id *IdentityDetector) LoadPeopleDict(path string) error {	file, err := os.Open(path)	if err != nil {		return err	}	defer file.Close()	scanner := bufio.NewScanner(file)	dict := make(map[string]int)	reverseDict := []string{}	size := 0	for scanner.Scan() {		ids := strings.Split(scanner.Text(), "|")		for _, id := range ids {			dict[strings.ToLower(id)] = size		}		reverseDict = append(reverseDict, ids[0])		size++	}	reverseDict = append(reverseDict, AuthorMissingName)	id.PeopleDict = dict	id.ReversedPeopleDict = reverseDict	return nil}func (id *IdentityDetector) GeneratePeopleDict(commits []*object.Commit) {	dict := map[string]int{}	emails := map[int][]string{}	names := map[int][]string{}	size := 0	mailmapFile, err := commits[len(commits)-1].File(".mailmap")	if err == nil {		mailMapContents, err := mailmapFile.Contents()		if err == nil {			mailmap := ParseMailmap(mailMapContents)			for key, val := range mailmap {				key = strings.ToLower(key)				toEmail := strings.ToLower(val.Email)				toName := strings.ToLower(val.Name)				id, exists := dict[toEmail]				if !exists {					id, exists = dict[toName]				}				if exists {					dict[key] = id				} else {					id = size					size++					if toEmail != "" {						dict[toEmail] = id						emails[id] = append(emails[id], toEmail)					}					if toName != "" {						dict[toName] = id						names[id] = append(names[id], toName)					}					dict[key] = id				}				if strings.Contains(key, "@") {					exists := false					for _, val := range emails[id] {						if key == val {							exists = true							break						}					}					if !exists {						emails[id] = append(emails[id], key)					}				} else {					exists := false					for _, val := range names[id] {						if key == val {							exists = true							break						}					}					if !exists {						names[id] = append(names[id], key)					}				}			}		}	}	for _, commit := range commits {		email := strings.ToLower(commit.Author.Email)		name := strings.ToLower(commit.Author.Name)		id, exists := dict[email]		if exists {			_, exists := dict[name]			if !exists {				dict[name] = id				names[id] = append(names[id], name)			}			continue		}		id, exists = dict[name]		if exists {			dict[email] = id			emails[id] = append(emails[id], email)			continue		}		dict[email] = size		dict[name] = size		emails[size] = append(emails[size], email)		names[size] = append(names[size], name)		size++	}	reverseDict := make([]string, size)	for _, val := range dict {		sort.Strings(names[val])		sort.Strings(emails[val])		reverseDict[val] = strings.Join(names[val], "|") + "|" + strings.Join(emails[val], "|")	}	id.PeopleDict = dict	id.ReversedPeopleDict = reverseDict}// MergeReversedDicts joins two identity lists together, excluding duplicates, in-order.func (id IdentityDetector) MergeReversedDicts(rd1, rd2 []string) (map[string][3]int, []string) {	people := map[string][3]int{}	for i, pid := range rd1 {		ptrs := people[pid]		ptrs[0] = len(people)		ptrs[1] = i		ptrs[2] = -1		people[pid] = ptrs	}	for i, pid := range rd2 {		ptrs, exists := people[pid]		if !exists {			ptrs[0] = len(people)			ptrs[1] = -1		}		ptrs[2] = i		people[pid] = ptrs	}	mrd := make([]string, len(people))	for name, ptrs := range people {		mrd[ptrs[0]] = name	}	return people, mrd}func init() {	Registry.Register(&IdentityDetector{})}
 |