couples.go 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. package hercules
  2. import (
  3. "sort"
  4. "gopkg.in/src-d/go-git.v4"
  5. "gopkg.in/src-d/go-git.v4/plumbing/object"
  6. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  7. )
  8. type Couples struct {
  9. // The number of developers for which to build the matrix. 0 disables this analysis.
  10. PeopleNumber int
  11. // people store how many times every developer committed to every file.
  12. people []map[string]int
  13. // people_commits is the number of commits each author made
  14. people_commits []int
  15. // files store every file occurred in the same commit with every other file.
  16. files map[string]map[string]int
  17. }
  18. type CouplesResult struct {
  19. PeopleMatrix []map[int]int64
  20. PeopleFiles [][]string
  21. FilesMatrix []map[int]int
  22. Files []string
  23. }
  24. func (couples *Couples) Name() string {
  25. return "Couples"
  26. }
  27. func (couples *Couples) Provides() []string {
  28. return []string{}
  29. }
  30. func (couples *Couples) Requires() []string {
  31. arr := [...]string{"author", "renamed_changes"}
  32. return arr[:]
  33. }
  34. func (couples *Couples) Initialize(repository *git.Repository) {
  35. couples.people = make([]map[string]int, couples.PeopleNumber+1)
  36. for i := range couples.people {
  37. couples.people[i] = map[string]int{}
  38. }
  39. couples.people_commits = make([]int, couples.PeopleNumber+1)
  40. couples.files = map[string]map[string]int{}
  41. }
  42. func (couples *Couples) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  43. author := deps["author"].(int)
  44. if author == MISSING_AUTHOR {
  45. author = couples.PeopleNumber
  46. }
  47. couples.people_commits[author] += 1
  48. tree_diff := deps["renamed_changes"].(object.Changes)
  49. context := make([]string, 0)
  50. deleteFile := func(name string) {
  51. // we do not remove the file from people - the context does not expire
  52. delete(couples.files, name)
  53. for _, otherFiles := range couples.files {
  54. delete(otherFiles, name)
  55. }
  56. }
  57. for _, change := range tree_diff {
  58. action, err := change.Action()
  59. if err != nil {
  60. return nil, err
  61. }
  62. switch action {
  63. case merkletrie.Insert:
  64. context = append(context, change.To.Name)
  65. couples.people[author][change.To.Name] += 1
  66. case merkletrie.Delete:
  67. deleteFile(change.From.Name)
  68. case merkletrie.Modify:
  69. toName := change.To.Name
  70. fromName := change.From.Name
  71. if fromName != toName {
  72. // renamed
  73. couples.files[toName] = couples.files[fromName]
  74. for _, otherFiles := range couples.files {
  75. val, exists := otherFiles[fromName]
  76. if exists {
  77. otherFiles[toName] = val
  78. }
  79. }
  80. deleteFile(change.From.Name)
  81. }
  82. context = append(context, toName)
  83. couples.people[author][toName] += 1
  84. }
  85. }
  86. for _, file := range context {
  87. for _, otherFile := range context {
  88. lane, exists := couples.files[file]
  89. if !exists {
  90. lane = map[string]int{}
  91. couples.files[file] = lane
  92. }
  93. lane[otherFile] += 1
  94. }
  95. }
  96. return nil, nil
  97. }
  98. func (couples *Couples) Finalize() interface{} {
  99. peopleMatrix := make([]map[int]int64, couples.PeopleNumber+1)
  100. peopleFiles := make([][]string, couples.PeopleNumber+1)
  101. for i := range peopleMatrix {
  102. peopleMatrix[i] = map[int]int64{}
  103. for file, commits := range couples.people[i] {
  104. //could be normalized further, by replacing file with idx in fileSequence
  105. //but this would trade the space for readability of result
  106. peopleFiles[i] = append(peopleFiles[i], file)
  107. for j, otherFiles := range couples.people {
  108. if i == j {
  109. continue
  110. }
  111. otherCommits := otherFiles[file]
  112. delta := otherCommits
  113. if otherCommits > commits {
  114. delta = commits
  115. }
  116. if delta > 0 {
  117. peopleMatrix[i][j] += int64(delta)
  118. }
  119. }
  120. }
  121. peopleMatrix[i][i] = int64(couples.people_commits[i])
  122. }
  123. filesSequence := make([]string, len(couples.files))
  124. i := 0
  125. for file := range couples.files {
  126. filesSequence[i] = file
  127. i++
  128. }
  129. sort.Strings(filesSequence)
  130. filesIndex := map[string]int{}
  131. for i, file := range filesSequence {
  132. filesIndex[file] = i
  133. }
  134. filesMatrix := make([]map[int]int, len(filesIndex))
  135. for i := range filesMatrix {
  136. filesMatrix[i] = map[int]int{}
  137. for otherFile, cooccs := range couples.files[filesSequence[i]] {
  138. filesMatrix[i][filesIndex[otherFile]] = cooccs
  139. }
  140. }
  141. return CouplesResult{PeopleMatrix: peopleMatrix, PeopleFiles: peopleFiles, Files: filesSequence, FilesMatrix: filesMatrix}
  142. }