couples.go 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. package hercules
  2. import (
  3. "sort"
  4. "gopkg.in/src-d/go-git.v4"
  5. "gopkg.in/src-d/go-git.v4/plumbing/object"
  6. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  7. )
  8. type Couples struct {
  9. // The number of developers for which to build the matrix. 0 disables this analysis.
  10. PeopleNumber int
  11. // people store how many times every developer committed to every file.
  12. people []map[string]int
  13. // people_commits is the number of commits each author made
  14. people_commits []int
  15. // files store every file occurred in the same commit with every other file.
  16. files map[string]map[string]int
  17. }
  18. type CouplesResult struct {
  19. PeopleMatrix []map[int]int64
  20. PeopleFiles [][]int
  21. FilesMatrix []map[int]int64
  22. Files []string
  23. }
  24. func (couples *Couples) Name() string {
  25. return "Couples"
  26. }
  27. func (couples *Couples) Provides() []string {
  28. return []string{}
  29. }
  30. func (couples *Couples) Requires() []string {
  31. arr := [...]string{"author", "renamed_changes"}
  32. return arr[:]
  33. }
  34. func (couples *Couples) Initialize(repository *git.Repository) {
  35. couples.people = make([]map[string]int, couples.PeopleNumber+1)
  36. for i := range couples.people {
  37. couples.people[i] = map[string]int{}
  38. }
  39. couples.people_commits = make([]int, couples.PeopleNumber+1)
  40. couples.files = map[string]map[string]int{}
  41. }
  42. func (couples *Couples) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  43. author := deps["author"].(int)
  44. if author == MISSING_AUTHOR {
  45. author = couples.PeopleNumber
  46. }
  47. couples.people_commits[author] += 1
  48. tree_diff := deps["renamed_changes"].(object.Changes)
  49. context := make([]string, 0)
  50. deleteFile := func(name string) {
  51. // we do not remove the file from people - the context does not expire
  52. delete(couples.files, name)
  53. for _, otherFiles := range couples.files {
  54. delete(otherFiles, name)
  55. }
  56. }
  57. for _, change := range tree_diff {
  58. action, err := change.Action()
  59. if err != nil {
  60. return nil, err
  61. }
  62. toName := change.To.Name
  63. fromName := change.From.Name
  64. switch action {
  65. case merkletrie.Insert:
  66. context = append(context, toName)
  67. couples.people[author][toName] += 1
  68. case merkletrie.Delete:
  69. deleteFile(fromName)
  70. couples.people[author][fromName] += 1
  71. case merkletrie.Modify:
  72. if fromName != toName {
  73. // renamed
  74. couples.files[toName] = couples.files[fromName]
  75. for _, otherFiles := range couples.files {
  76. val, exists := otherFiles[fromName]
  77. if exists {
  78. otherFiles[toName] = val
  79. }
  80. }
  81. deleteFile(fromName)
  82. for _, authorFiles := range couples.people {
  83. val, exists := authorFiles[fromName]
  84. if exists {
  85. authorFiles[toName] = val
  86. delete(authorFiles, fromName)
  87. }
  88. }
  89. }
  90. context = append(context, toName)
  91. couples.people[author][toName] += 1
  92. }
  93. }
  94. for _, file := range context {
  95. for _, otherFile := range context {
  96. lane, exists := couples.files[file]
  97. if !exists {
  98. lane = map[string]int{}
  99. couples.files[file] = lane
  100. }
  101. lane[otherFile] += 1
  102. }
  103. }
  104. return nil, nil
  105. }
  106. func (couples *Couples) Finalize() interface{} {
  107. filesSequence := make([]string, len(couples.files))
  108. i := 0
  109. for file := range couples.files {
  110. filesSequence[i] = file
  111. i++
  112. }
  113. sort.Strings(filesSequence)
  114. filesIndex := map[string]int{}
  115. for i, file := range filesSequence {
  116. filesIndex[file] = i
  117. }
  118. peopleMatrix := make([]map[int]int64, couples.PeopleNumber+1)
  119. peopleFiles := make([][]int, couples.PeopleNumber+1)
  120. for i := range peopleMatrix {
  121. peopleMatrix[i] = map[int]int64{}
  122. for file, commits := range couples.people[i] {
  123. fi, exists := filesIndex[file]
  124. if exists {
  125. peopleFiles[i] = append(peopleFiles[i], fi)
  126. }
  127. for j, otherFiles := range couples.people {
  128. otherCommits := otherFiles[file]
  129. delta := otherCommits
  130. if otherCommits > commits {
  131. delta = commits
  132. }
  133. if delta > 0 {
  134. peopleMatrix[i][j] += int64(delta)
  135. }
  136. }
  137. }
  138. sort.Ints(peopleFiles[i])
  139. }
  140. filesMatrix := make([]map[int]int64, len(filesIndex))
  141. for i := range filesMatrix {
  142. filesMatrix[i] = map[int]int64{}
  143. for otherFile, cooccs := range couples.files[filesSequence[i]] {
  144. filesMatrix[i][filesIndex[otherFile]] = int64(cooccs)
  145. }
  146. }
  147. return CouplesResult{
  148. PeopleMatrix: peopleMatrix, PeopleFiles: peopleFiles,
  149. Files: filesSequence, FilesMatrix: filesMatrix}
  150. }