| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323 | package herculesimport (	"fmt"	"io"	"sort"	"github.com/gogo/protobuf/proto"	"gopkg.in/src-d/go-git.v4"	"gopkg.in/src-d/go-git.v4/plumbing/object"	"gopkg.in/src-d/go-git.v4/utils/merkletrie"	"gopkg.in/src-d/hercules.v3/pb"	"gopkg.in/src-d/hercules.v3/yaml")type CouplesAnalysis struct {	// The number of developers for which to build the matrix. 0 disables this analysis.	PeopleNumber int	// people store how many times every developer committed to every file.	people []map[string]int	// people_commits is the number of commits each author made	people_commits []int	// files store every file occurred in the same commit with every other file.	files map[string]map[string]int	// references IdentityDetector.ReversedPeopleDict	reversedPeopleDict []string}type CouplesResult struct {	PeopleMatrix []map[int]int64	PeopleFiles  [][]int	FilesMatrix  []map[int]int64	Files        []string}func (couples *CouplesAnalysis) Name() string {	return "Couples"}func (couples *CouplesAnalysis) Provides() []string {	return []string{}}func (couples *CouplesAnalysis) Requires() []string {	arr := [...]string{"author", "changes"}	return arr[:]}func (couples *CouplesAnalysis) ListConfigurationOptions() []ConfigurationOption {	return []ConfigurationOption{}}func (couples *CouplesAnalysis) Configure(facts map[string]interface{}) {	if val, exists := facts[FactIdentityDetectorPeopleCount].(int); exists {		couples.PeopleNumber = val		couples.reversedPeopleDict = facts[FactIdentityDetectorReversedPeopleDict].([]string)	}}func (couples *CouplesAnalysis) Flag() string {	return "couples"}func (couples *CouplesAnalysis) Initialize(repository *git.Repository) {	couples.people = make([]map[string]int, couples.PeopleNumber+1)	for i := range couples.people {		couples.people[i] = map[string]int{}	}	couples.people_commits = make([]int, couples.PeopleNumber+1)	couples.files = map[string]map[string]int{}}func (couples *CouplesAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {	author := deps["author"].(int)	if author == MISSING_AUTHOR {		author = couples.PeopleNumber	}	couples.people_commits[author] += 1	tree_diff := deps["changes"].(object.Changes)	context := make([]string, 0)	deleteFile := func(name string) {		// we do not remove the file from people - the context does not expire		delete(couples.files, name)		for _, otherFiles := range couples.files {			delete(otherFiles, name)		}	}	for _, change := range tree_diff {		action, err := change.Action()		if err != nil {			return nil, err		}		toName := change.To.Name		fromName := change.From.Name		switch action {		case merkletrie.Insert:			context = append(context, toName)			couples.people[author][toName] += 1		case merkletrie.Delete:			deleteFile(fromName)			couples.people[author][fromName] += 1		case merkletrie.Modify:			if fromName != toName {				// renamed				couples.files[toName] = couples.files[fromName]				for _, otherFiles := range couples.files {					val, exists := otherFiles[fromName]					if exists {						otherFiles[toName] = val					}				}				deleteFile(fromName)				for _, authorFiles := range couples.people {					val, exists := authorFiles[fromName]					if exists {						authorFiles[toName] = val						delete(authorFiles, fromName)					}				}			}			context = append(context, toName)			couples.people[author][toName] += 1		}	}	for _, file := range context {		for _, otherFile := range context {			lane, exists := couples.files[file]			if !exists {				lane = map[string]int{}				couples.files[file] = lane			}			lane[otherFile] += 1		}	}	return nil, nil}func (couples *CouplesAnalysis) Finalize() interface{} {	filesSequence := make([]string, len(couples.files))	i := 0	for file := range couples.files {		filesSequence[i] = file		i++	}	sort.Strings(filesSequence)	filesIndex := map[string]int{}	for i, file := range filesSequence {		filesIndex[file] = i	}	peopleMatrix := make([]map[int]int64, couples.PeopleNumber+1)	peopleFiles := make([][]int, couples.PeopleNumber+1)	for i := range peopleMatrix {		peopleMatrix[i] = map[int]int64{}		for file, commits := range couples.people[i] {			fi, exists := filesIndex[file]			if exists {				peopleFiles[i] = append(peopleFiles[i], fi)			}			for j, otherFiles := range couples.people {				otherCommits := otherFiles[file]				delta := otherCommits				if otherCommits > commits {					delta = commits				}				if delta > 0 {					peopleMatrix[i][j] += int64(delta)				}			}		}		sort.Ints(peopleFiles[i])	}	filesMatrix := make([]map[int]int64, len(filesIndex))	for i := range filesMatrix {		filesMatrix[i] = map[int]int64{}		for otherFile, cooccs := range couples.files[filesSequence[i]] {			filesMatrix[i][filesIndex[otherFile]] = int64(cooccs)		}	}	return CouplesResult{		PeopleMatrix: peopleMatrix, PeopleFiles: peopleFiles,		Files: filesSequence, FilesMatrix: filesMatrix}}func (couples *CouplesAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {	couplesResult := result.(CouplesResult)	if binary {		return couples.serializeBinary(&couplesResult, writer)	}	couples.serializeText(&couplesResult, writer)	return nil}func (couples *CouplesAnalysis) serializeText(result *CouplesResult, writer io.Writer) {	fmt.Fprintln(writer, "  files_coocc:")	fmt.Fprintln(writer, "    index:")	for _, file := range result.Files {		fmt.Fprintf(writer, "      - %s\n", yaml.SafeString(file))	}	fmt.Fprintln(writer, "    matrix:")	for _, files := range result.FilesMatrix {		fmt.Fprint(writer, "      - {")		indices := []int{}		for file := range files {			indices = append(indices, file)		}		sort.Ints(indices)		for i, file := range indices {			fmt.Fprintf(writer, "%d: %d", file, files[file])			if i < len(indices)-1 {				fmt.Fprint(writer, ", ")			}		}		fmt.Fprintln(writer, "}")	}	fmt.Fprintln(writer, "  people_coocc:")	fmt.Fprintln(writer, "    index:")	for _, person := range couples.reversedPeopleDict {		fmt.Fprintf(writer, "      - %s\n", yaml.SafeString(person))	}	fmt.Fprintln(writer, "    matrix:")	for _, people := range result.PeopleMatrix {		fmt.Fprint(writer, "      - {")		indices := []int{}		for file := range people {			indices = append(indices, file)		}		sort.Ints(indices)		for i, person := range indices {			fmt.Fprintf(writer, "%d: %d", person, people[person])			if i < len(indices)-1 {				fmt.Fprint(writer, ", ")			}		}		fmt.Fprintln(writer, "}")	}	fmt.Fprintln(writer, "    author_files:") // sorted by number of files each author changed	peopleFiles := sortByNumberOfFiles(result.PeopleFiles, couples.reversedPeopleDict, result.Files)	for _, authorFiles := range peopleFiles {		fmt.Fprintf(writer, "      - %s:\n", yaml.SafeString(authorFiles.Author))		sort.Strings(authorFiles.Files)		for _, file := range authorFiles.Files {			fmt.Fprintf(writer, "        - %s\n", yaml.SafeString(file)) // sorted by path		}	}}func sortByNumberOfFiles(	peopleFiles [][]int, peopleDict []string, filesDict []string) authorFilesList {	var pfl authorFilesList	for peopleIdx, files := range peopleFiles {		if peopleIdx < len(peopleDict) {			fileNames := make([]string, len(files))			for i, fi := range files {				fileNames[i] = filesDict[fi]			}			pfl = append(pfl, authorFiles{peopleDict[peopleIdx], fileNames})		}	}	sort.Sort(pfl)	return pfl}type authorFiles struct {	Author string	Files  []string}type authorFilesList []authorFilesfunc (s authorFilesList) Len() int {	return len(s)}func (s authorFilesList) Swap(i, j int) {	s[i], s[j] = s[j], s[i]}func (s authorFilesList) Less(i, j int) bool {	return len(s[i].Files) < len(s[j].Files)}func (couples *CouplesAnalysis) serializeBinary(result *CouplesResult, writer io.Writer) error {	message := pb.CouplesAnalysisResults{}	message.FileCouples = &pb.Couples{		Index:  result.Files,		Matrix: pb.MapToCompressedSparseRowMatrix(result.FilesMatrix),	}	message.DeveloperCouples = &pb.Couples{		Index:  couples.reversedPeopleDict,		Matrix: pb.MapToCompressedSparseRowMatrix(result.PeopleMatrix),	}	message.TouchedFiles = &pb.DeveloperTouchedFiles{		Developers: make([]*pb.TouchedFiles, len(couples.reversedPeopleDict)),	}	for key := range couples.reversedPeopleDict {		files := result.PeopleFiles[key]		int32Files := make([]int32, len(files))		for i, f := range files {			int32Files[i] = int32(f)		}		message.TouchedFiles.Developers[key] = &pb.TouchedFiles{			Files: int32Files,		}	}	serialized, err := proto.Marshal(&message)	if err != nil {		return err	}	writer.Write(serialized)	return nil}func init() {	Registry.Register(&CouplesAnalysis{})}
 |