couples.go 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. package hercules
  2. import (
  3. "fmt"
  4. "io"
  5. "sort"
  6. "github.com/gogo/protobuf/proto"
  7. "gopkg.in/src-d/go-git.v4"
  8. "gopkg.in/src-d/go-git.v4/plumbing/object"
  9. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  10. "gopkg.in/src-d/hercules.v3/pb"
  11. "gopkg.in/src-d/hercules.v3/yaml"
  12. )
  13. type CouplesAnalysis struct {
  14. // The number of developers for which to build the matrix. 0 disables this analysis.
  15. PeopleNumber int
  16. // people store how many times every developer committed to every file.
  17. people []map[string]int
  18. // people_commits is the number of commits each author made
  19. people_commits []int
  20. // files store every file occurred in the same commit with every other file.
  21. files map[string]map[string]int
  22. // references IdentityDetector.ReversedPeopleDict
  23. reversedPeopleDict []string
  24. }
  25. type CouplesResult struct {
  26. PeopleMatrix []map[int]int64
  27. PeopleFiles [][]int
  28. FilesMatrix []map[int]int64
  29. Files []string
  30. }
  31. func (couples *CouplesAnalysis) Name() string {
  32. return "Couples"
  33. }
  34. func (couples *CouplesAnalysis) Provides() []string {
  35. return []string{}
  36. }
  37. func (couples *CouplesAnalysis) Requires() []string {
  38. arr := [...]string{"author", "changes"}
  39. return arr[:]
  40. }
  41. func (couples *CouplesAnalysis) ListConfigurationOptions() []ConfigurationOption {
  42. return []ConfigurationOption{}
  43. }
  44. func (couples *CouplesAnalysis) Configure(facts map[string]interface{}) {
  45. if val, exists := facts[FactIdentityDetectorPeopleCount].(int); exists {
  46. couples.PeopleNumber = val
  47. couples.reversedPeopleDict = facts[FactIdentityDetectorReversedPeopleDict].([]string)
  48. }
  49. }
  50. func (couples *CouplesAnalysis) Flag() string {
  51. return "couples"
  52. }
  53. func (couples *CouplesAnalysis) Initialize(repository *git.Repository) {
  54. couples.people = make([]map[string]int, couples.PeopleNumber+1)
  55. for i := range couples.people {
  56. couples.people[i] = map[string]int{}
  57. }
  58. couples.people_commits = make([]int, couples.PeopleNumber+1)
  59. couples.files = map[string]map[string]int{}
  60. }
  61. func (couples *CouplesAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  62. author := deps["author"].(int)
  63. if author == MISSING_AUTHOR {
  64. author = couples.PeopleNumber
  65. }
  66. couples.people_commits[author] += 1
  67. tree_diff := deps["changes"].(object.Changes)
  68. context := make([]string, 0)
  69. deleteFile := func(name string) {
  70. // we do not remove the file from people - the context does not expire
  71. delete(couples.files, name)
  72. for _, otherFiles := range couples.files {
  73. delete(otherFiles, name)
  74. }
  75. }
  76. for _, change := range tree_diff {
  77. action, err := change.Action()
  78. if err != nil {
  79. return nil, err
  80. }
  81. toName := change.To.Name
  82. fromName := change.From.Name
  83. switch action {
  84. case merkletrie.Insert:
  85. context = append(context, toName)
  86. couples.people[author][toName] += 1
  87. case merkletrie.Delete:
  88. deleteFile(fromName)
  89. couples.people[author][fromName] += 1
  90. case merkletrie.Modify:
  91. if fromName != toName {
  92. // renamed
  93. couples.files[toName] = couples.files[fromName]
  94. for _, otherFiles := range couples.files {
  95. val, exists := otherFiles[fromName]
  96. if exists {
  97. otherFiles[toName] = val
  98. }
  99. }
  100. deleteFile(fromName)
  101. for _, authorFiles := range couples.people {
  102. val, exists := authorFiles[fromName]
  103. if exists {
  104. authorFiles[toName] = val
  105. delete(authorFiles, fromName)
  106. }
  107. }
  108. }
  109. context = append(context, toName)
  110. couples.people[author][toName] += 1
  111. }
  112. }
  113. for _, file := range context {
  114. for _, otherFile := range context {
  115. lane, exists := couples.files[file]
  116. if !exists {
  117. lane = map[string]int{}
  118. couples.files[file] = lane
  119. }
  120. lane[otherFile] += 1
  121. }
  122. }
  123. return nil, nil
  124. }
  125. func (couples *CouplesAnalysis) Finalize() interface{} {
  126. filesSequence := make([]string, len(couples.files))
  127. i := 0
  128. for file := range couples.files {
  129. filesSequence[i] = file
  130. i++
  131. }
  132. sort.Strings(filesSequence)
  133. filesIndex := map[string]int{}
  134. for i, file := range filesSequence {
  135. filesIndex[file] = i
  136. }
  137. peopleMatrix := make([]map[int]int64, couples.PeopleNumber+1)
  138. peopleFiles := make([][]int, couples.PeopleNumber+1)
  139. for i := range peopleMatrix {
  140. peopleMatrix[i] = map[int]int64{}
  141. for file, commits := range couples.people[i] {
  142. fi, exists := filesIndex[file]
  143. if exists {
  144. peopleFiles[i] = append(peopleFiles[i], fi)
  145. }
  146. for j, otherFiles := range couples.people {
  147. otherCommits := otherFiles[file]
  148. delta := otherCommits
  149. if otherCommits > commits {
  150. delta = commits
  151. }
  152. if delta > 0 {
  153. peopleMatrix[i][j] += int64(delta)
  154. }
  155. }
  156. }
  157. sort.Ints(peopleFiles[i])
  158. }
  159. filesMatrix := make([]map[int]int64, len(filesIndex))
  160. for i := range filesMatrix {
  161. filesMatrix[i] = map[int]int64{}
  162. for otherFile, cooccs := range couples.files[filesSequence[i]] {
  163. filesMatrix[i][filesIndex[otherFile]] = int64(cooccs)
  164. }
  165. }
  166. return CouplesResult{
  167. PeopleMatrix: peopleMatrix, PeopleFiles: peopleFiles,
  168. Files: filesSequence, FilesMatrix: filesMatrix}
  169. }
  170. func (couples *CouplesAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  171. couplesResult := result.(CouplesResult)
  172. if binary {
  173. return couples.serializeBinary(&couplesResult, writer)
  174. }
  175. couples.serializeText(&couplesResult, writer)
  176. return nil
  177. }
  178. func (couples *CouplesAnalysis) serializeText(result *CouplesResult, writer io.Writer) {
  179. fmt.Fprintln(writer, " files_coocc:")
  180. fmt.Fprintln(writer, " index:")
  181. for _, file := range result.Files {
  182. fmt.Fprintf(writer, " - %s\n", yaml.SafeString(file))
  183. }
  184. fmt.Fprintln(writer, " matrix:")
  185. for _, files := range result.FilesMatrix {
  186. fmt.Fprint(writer, " - {")
  187. indices := []int{}
  188. for file := range files {
  189. indices = append(indices, file)
  190. }
  191. sort.Ints(indices)
  192. for i, file := range indices {
  193. fmt.Fprintf(writer, "%d: %d", file, files[file])
  194. if i < len(indices)-1 {
  195. fmt.Fprint(writer, ", ")
  196. }
  197. }
  198. fmt.Fprintln(writer, "}")
  199. }
  200. fmt.Fprintln(writer, " people_coocc:")
  201. fmt.Fprintln(writer, " index:")
  202. for _, person := range couples.reversedPeopleDict {
  203. fmt.Fprintf(writer, " - %s\n", yaml.SafeString(person))
  204. }
  205. fmt.Fprintln(writer, " matrix:")
  206. for _, people := range result.PeopleMatrix {
  207. fmt.Fprint(writer, " - {")
  208. indices := []int{}
  209. for file := range people {
  210. indices = append(indices, file)
  211. }
  212. sort.Ints(indices)
  213. for i, person := range indices {
  214. fmt.Fprintf(writer, "%d: %d", person, people[person])
  215. if i < len(indices)-1 {
  216. fmt.Fprint(writer, ", ")
  217. }
  218. }
  219. fmt.Fprintln(writer, "}")
  220. }
  221. fmt.Fprintln(writer, " author_files:") // sorted by number of files each author changed
  222. peopleFiles := sortByNumberOfFiles(result.PeopleFiles, couples.reversedPeopleDict, result.Files)
  223. for _, authorFiles := range peopleFiles {
  224. fmt.Fprintf(writer, " - %s:\n", yaml.SafeString(authorFiles.Author))
  225. sort.Strings(authorFiles.Files)
  226. for _, file := range authorFiles.Files {
  227. fmt.Fprintf(writer, " - %s\n", yaml.SafeString(file)) // sorted by path
  228. }
  229. }
  230. }
  231. func sortByNumberOfFiles(
  232. peopleFiles [][]int, peopleDict []string, filesDict []string) authorFilesList {
  233. var pfl authorFilesList
  234. for peopleIdx, files := range peopleFiles {
  235. if peopleIdx < len(peopleDict) {
  236. fileNames := make([]string, len(files))
  237. for i, fi := range files {
  238. fileNames[i] = filesDict[fi]
  239. }
  240. pfl = append(pfl, authorFiles{peopleDict[peopleIdx], fileNames})
  241. }
  242. }
  243. sort.Sort(pfl)
  244. return pfl
  245. }
  246. type authorFiles struct {
  247. Author string
  248. Files []string
  249. }
  250. type authorFilesList []authorFiles
  251. func (s authorFilesList) Len() int {
  252. return len(s)
  253. }
  254. func (s authorFilesList) Swap(i, j int) {
  255. s[i], s[j] = s[j], s[i]
  256. }
  257. func (s authorFilesList) Less(i, j int) bool {
  258. return len(s[i].Files) < len(s[j].Files)
  259. }
  260. func (couples *CouplesAnalysis) serializeBinary(result *CouplesResult, writer io.Writer) error {
  261. message := pb.CouplesAnalysisResults{}
  262. message.FileCouples = &pb.Couples{
  263. Index: result.Files,
  264. Matrix: pb.MapToCompressedSparseRowMatrix(result.FilesMatrix),
  265. }
  266. message.DeveloperCouples = &pb.Couples{
  267. Index: couples.reversedPeopleDict,
  268. Matrix: pb.MapToCompressedSparseRowMatrix(result.PeopleMatrix),
  269. }
  270. message.TouchedFiles = &pb.DeveloperTouchedFiles{
  271. Developers: make([]*pb.TouchedFiles, len(couples.reversedPeopleDict)),
  272. }
  273. for key := range couples.reversedPeopleDict {
  274. files := result.PeopleFiles[key]
  275. int32Files := make([]int32, len(files))
  276. for i, f := range files {
  277. int32Files[i] = int32(f)
  278. }
  279. message.TouchedFiles.Developers[key] = &pb.TouchedFiles{
  280. Files: int32Files,
  281. }
  282. }
  283. serialized, err := proto.Marshal(&message)
  284. if err != nil {
  285. return err
  286. }
  287. writer.Write(serialized)
  288. return nil
  289. }
  290. func init() {
  291. Registry.Register(&CouplesAnalysis{})
  292. }