couples.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. package hercules
  2. import (
  3. "fmt"
  4. "io"
  5. "sort"
  6. "github.com/gogo/protobuf/proto"
  7. "gopkg.in/src-d/go-git.v4"
  8. "gopkg.in/src-d/go-git.v4/plumbing/object"
  9. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  10. "gopkg.in/src-d/hercules.v3/pb"
  11. "gopkg.in/src-d/hercules.v3/yaml"
  12. )
  13. type CouplesAnalysis struct {
  14. // The number of developers for which to build the matrix. 0 disables this analysis.
  15. PeopleNumber int
  16. // people store how many times every developer committed to every file.
  17. people []map[string]int
  18. // people_commits is the number of commits each author made
  19. people_commits []int
  20. // files store every file occurred in the same commit with every other file.
  21. files map[string]map[string]int
  22. // references IdentityDetector.ReversedPeopleDict
  23. reversedPeopleDict []string
  24. }
  25. type CouplesResult struct {
  26. PeopleMatrix []map[int]int64
  27. PeopleFiles [][]int
  28. FilesMatrix []map[int]int64
  29. Files []string
  30. // references IdentityDetector.ReversedPeopleDict
  31. reversedPeopleDict []string
  32. }
  33. func (couples *CouplesAnalysis) Name() string {
  34. return "Couples"
  35. }
  36. func (couples *CouplesAnalysis) Provides() []string {
  37. return []string{}
  38. }
  39. func (couples *CouplesAnalysis) Requires() []string {
  40. arr := [...]string{DependencyAuthor, DependencyTreeChanges}
  41. return arr[:]
  42. }
  43. func (couples *CouplesAnalysis) ListConfigurationOptions() []ConfigurationOption {
  44. return []ConfigurationOption{}
  45. }
  46. func (couples *CouplesAnalysis) Configure(facts map[string]interface{}) {
  47. if val, exists := facts[FactIdentityDetectorPeopleCount].(int); exists {
  48. couples.PeopleNumber = val
  49. couples.reversedPeopleDict = facts[FactIdentityDetectorReversedPeopleDict].([]string)
  50. }
  51. }
  52. func (couples *CouplesAnalysis) Flag() string {
  53. return "couples"
  54. }
  55. func (couples *CouplesAnalysis) Initialize(repository *git.Repository) {
  56. couples.people = make([]map[string]int, couples.PeopleNumber+1)
  57. for i := range couples.people {
  58. couples.people[i] = map[string]int{}
  59. }
  60. couples.people_commits = make([]int, couples.PeopleNumber+1)
  61. couples.files = map[string]map[string]int{}
  62. }
  63. func (couples *CouplesAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  64. author := deps[DependencyAuthor].(int)
  65. if author == MISSING_AUTHOR {
  66. author = couples.PeopleNumber
  67. }
  68. couples.people_commits[author] += 1
  69. tree_diff := deps[DependencyTreeChanges].(object.Changes)
  70. context := make([]string, 0)
  71. deleteFile := func(name string) {
  72. // we do not remove the file from people - the context does not expire
  73. delete(couples.files, name)
  74. for _, otherFiles := range couples.files {
  75. delete(otherFiles, name)
  76. }
  77. }
  78. for _, change := range tree_diff {
  79. action, err := change.Action()
  80. if err != nil {
  81. return nil, err
  82. }
  83. toName := change.To.Name
  84. fromName := change.From.Name
  85. switch action {
  86. case merkletrie.Insert:
  87. context = append(context, toName)
  88. couples.people[author][toName] += 1
  89. case merkletrie.Delete:
  90. deleteFile(fromName)
  91. couples.people[author][fromName] += 1
  92. case merkletrie.Modify:
  93. if fromName != toName {
  94. // renamed
  95. couples.files[toName] = couples.files[fromName]
  96. for _, otherFiles := range couples.files {
  97. val, exists := otherFiles[fromName]
  98. if exists {
  99. otherFiles[toName] = val
  100. }
  101. }
  102. deleteFile(fromName)
  103. for _, authorFiles := range couples.people {
  104. val, exists := authorFiles[fromName]
  105. if exists {
  106. authorFiles[toName] = val
  107. delete(authorFiles, fromName)
  108. }
  109. }
  110. }
  111. context = append(context, toName)
  112. couples.people[author][toName] += 1
  113. }
  114. }
  115. for _, file := range context {
  116. for _, otherFile := range context {
  117. lane, exists := couples.files[file]
  118. if !exists {
  119. lane = map[string]int{}
  120. couples.files[file] = lane
  121. }
  122. lane[otherFile] += 1
  123. }
  124. }
  125. return nil, nil
  126. }
  127. func (couples *CouplesAnalysis) Finalize() interface{} {
  128. filesSequence := make([]string, len(couples.files))
  129. i := 0
  130. for file := range couples.files {
  131. filesSequence[i] = file
  132. i++
  133. }
  134. sort.Strings(filesSequence)
  135. filesIndex := map[string]int{}
  136. for i, file := range filesSequence {
  137. filesIndex[file] = i
  138. }
  139. peopleMatrix := make([]map[int]int64, couples.PeopleNumber+1)
  140. peopleFiles := make([][]int, couples.PeopleNumber+1)
  141. for i := range peopleMatrix {
  142. peopleMatrix[i] = map[int]int64{}
  143. for file, commits := range couples.people[i] {
  144. fi, exists := filesIndex[file]
  145. if exists {
  146. peopleFiles[i] = append(peopleFiles[i], fi)
  147. }
  148. for j, otherFiles := range couples.people {
  149. otherCommits := otherFiles[file]
  150. delta := otherCommits
  151. if otherCommits > commits {
  152. delta = commits
  153. }
  154. if delta > 0 {
  155. peopleMatrix[i][j] += int64(delta)
  156. }
  157. }
  158. }
  159. sort.Ints(peopleFiles[i])
  160. }
  161. filesMatrix := make([]map[int]int64, len(filesIndex))
  162. for i := range filesMatrix {
  163. filesMatrix[i] = map[int]int64{}
  164. for otherFile, cooccs := range couples.files[filesSequence[i]] {
  165. filesMatrix[i][filesIndex[otherFile]] = int64(cooccs)
  166. }
  167. }
  168. return CouplesResult{
  169. PeopleMatrix: peopleMatrix,
  170. PeopleFiles: peopleFiles,
  171. Files: filesSequence,
  172. FilesMatrix: filesMatrix,
  173. reversedPeopleDict: couples.reversedPeopleDict,
  174. }
  175. }
  176. func (couples *CouplesAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  177. couplesResult := result.(CouplesResult)
  178. if binary {
  179. return couples.serializeBinary(&couplesResult, writer)
  180. }
  181. couples.serializeText(&couplesResult, writer)
  182. return nil
  183. }
  184. func (couples *CouplesAnalysis) Deserialize(pbmessage []byte) (interface{}, error) {
  185. message := pb.CouplesAnalysisResults{}
  186. err := proto.Unmarshal(pbmessage, &message)
  187. if err != nil {
  188. return nil, err
  189. }
  190. result := CouplesResult{
  191. Files: message.FileCouples.Index,
  192. FilesMatrix: make([]map[int]int64, message.FileCouples.Matrix.NumberOfRows),
  193. PeopleFiles: make([][]int, len(message.PeopleCouples.Index)),
  194. PeopleMatrix: make([]map[int]int64, message.PeopleCouples.Matrix.NumberOfRows),
  195. reversedPeopleDict: message.PeopleCouples.Index,
  196. }
  197. for i, files := range message.PeopleFiles {
  198. result.PeopleFiles[i] = make([]int, len(files.Files))
  199. for j, val := range files.Files {
  200. result.PeopleFiles[i][j] = int(val)
  201. }
  202. }
  203. convertCSR := func(dest []map[int]int64, src *pb.CompressedSparseRowMatrix) {
  204. for indptr := range src.Indptr {
  205. if indptr == 0 {
  206. continue
  207. }
  208. dest[indptr-1] = map[int]int64{}
  209. for j := src.Indptr[indptr-1]; j < src.Indptr[indptr]; j++ {
  210. dest[indptr-1][int(src.Indices[j])] = src.Data[j]
  211. }
  212. }
  213. }
  214. convertCSR(result.FilesMatrix, message.FileCouples.Matrix)
  215. convertCSR(result.PeopleMatrix, message.PeopleCouples.Matrix)
  216. return result, nil
  217. }
  218. func (couples *CouplesAnalysis) MergeResults(r1, r2 interface{}, c1, c2 *CommonAnalysisResult) interface{} {
  219. cr1 := r1.(CouplesResult)
  220. cr2 := r2.(CouplesResult)
  221. merged := CouplesResult{}
  222. var people, files map[string][3]int
  223. people, merged.reversedPeopleDict = IdentityDetector{}.MergeReversedDicts(
  224. cr1.reversedPeopleDict, cr2.reversedPeopleDict)
  225. files, merged.Files = IdentityDetector{}.MergeReversedDicts(cr1.Files, cr2.Files)
  226. merged.PeopleFiles = make([][]int, len(merged.reversedPeopleDict))
  227. peopleFilesDicts := make([]map[int]bool, len(merged.reversedPeopleDict))
  228. addPeopleFiles := func(peopleFiles [][]int, reversedPeopleDict []string,
  229. reversedFilesDict []string) {
  230. for pi, fs := range peopleFiles {
  231. idx := people[reversedPeopleDict[pi]][0]
  232. m := peopleFilesDicts[idx]
  233. if m == nil {
  234. m = map[int]bool{}
  235. peopleFilesDicts[idx] = m
  236. }
  237. for _, f := range fs {
  238. m[files[reversedFilesDict[f]][0]] = true
  239. }
  240. }
  241. }
  242. addPeopleFiles(cr1.PeopleFiles, cr1.reversedPeopleDict, cr1.Files)
  243. addPeopleFiles(cr2.PeopleFiles, cr2.reversedPeopleDict, cr2.Files)
  244. for i, m := range peopleFilesDicts {
  245. merged.PeopleFiles[i] = make([]int, len(m))
  246. j := 0
  247. for f := range m {
  248. merged.PeopleFiles[i][j] = f
  249. j++
  250. }
  251. sort.Ints(merged.PeopleFiles[i])
  252. }
  253. merged.PeopleMatrix = make([]map[int]int64, len(merged.reversedPeopleDict)+1)
  254. addPeople := func(peopleMatrix []map[int]int64, reversedPeopleDict []string,
  255. reversedFilesDict []string) {
  256. for pi, pc := range peopleMatrix {
  257. var idx int
  258. if pi < len(reversedPeopleDict) {
  259. idx = people[reversedPeopleDict[pi]][0]
  260. } else {
  261. idx = len(merged.reversedPeopleDict)
  262. }
  263. m := merged.PeopleMatrix[idx]
  264. if m == nil {
  265. m = map[int]int64{}
  266. merged.PeopleMatrix[idx] = m
  267. }
  268. for file, val := range pc {
  269. m[files[reversedFilesDict[file]][0]] += val
  270. }
  271. }
  272. }
  273. addPeople(cr1.PeopleMatrix, cr1.reversedPeopleDict, cr1.Files)
  274. addPeople(cr2.PeopleMatrix, cr2.reversedPeopleDict, cr2.Files)
  275. merged.FilesMatrix = make([]map[int]int64, len(merged.Files))
  276. addFiles := func(filesMatrix []map[int]int64, reversedFilesDict []string) {
  277. for fi, fc := range filesMatrix {
  278. idx := people[reversedFilesDict[fi]][0]
  279. m := merged.FilesMatrix[idx]
  280. if m == nil {
  281. m = map[int]int64{}
  282. merged.FilesMatrix[idx] = m
  283. }
  284. for file, val := range fc {
  285. m[files[reversedFilesDict[file]][0]] += val
  286. }
  287. }
  288. }
  289. addFiles(cr1.FilesMatrix, cr1.Files)
  290. addFiles(cr2.FilesMatrix, cr2.Files)
  291. return merged
  292. }
  293. func (couples *CouplesAnalysis) serializeText(result *CouplesResult, writer io.Writer) {
  294. fmt.Fprintln(writer, " files_coocc:")
  295. fmt.Fprintln(writer, " index:")
  296. for _, file := range result.Files {
  297. fmt.Fprintf(writer, " - %s\n", yaml.SafeString(file))
  298. }
  299. fmt.Fprintln(writer, " matrix:")
  300. for _, files := range result.FilesMatrix {
  301. fmt.Fprint(writer, " - {")
  302. indices := []int{}
  303. for file := range files {
  304. indices = append(indices, file)
  305. }
  306. sort.Ints(indices)
  307. for i, file := range indices {
  308. fmt.Fprintf(writer, "%d: %d", file, files[file])
  309. if i < len(indices)-1 {
  310. fmt.Fprint(writer, ", ")
  311. }
  312. }
  313. fmt.Fprintln(writer, "}")
  314. }
  315. fmt.Fprintln(writer, " people_coocc:")
  316. fmt.Fprintln(writer, " index:")
  317. for _, person := range couples.reversedPeopleDict {
  318. fmt.Fprintf(writer, " - %s\n", yaml.SafeString(person))
  319. }
  320. fmt.Fprintln(writer, " matrix:")
  321. for _, people := range result.PeopleMatrix {
  322. fmt.Fprint(writer, " - {")
  323. indices := []int{}
  324. for file := range people {
  325. indices = append(indices, file)
  326. }
  327. sort.Ints(indices)
  328. for i, person := range indices {
  329. fmt.Fprintf(writer, "%d: %d", person, people[person])
  330. if i < len(indices)-1 {
  331. fmt.Fprint(writer, ", ")
  332. }
  333. }
  334. fmt.Fprintln(writer, "}")
  335. }
  336. fmt.Fprintln(writer, " author_files:") // sorted by number of files each author changed
  337. peopleFiles := sortByNumberOfFiles(result.PeopleFiles, couples.reversedPeopleDict, result.Files)
  338. for _, authorFiles := range peopleFiles {
  339. fmt.Fprintf(writer, " - %s:\n", yaml.SafeString(authorFiles.Author))
  340. sort.Strings(authorFiles.Files)
  341. for _, file := range authorFiles.Files {
  342. fmt.Fprintf(writer, " - %s\n", yaml.SafeString(file)) // sorted by path
  343. }
  344. }
  345. }
  346. func sortByNumberOfFiles(
  347. peopleFiles [][]int, peopleDict []string, filesDict []string) authorFilesList {
  348. var pfl authorFilesList
  349. for peopleIdx, files := range peopleFiles {
  350. if peopleIdx < len(peopleDict) {
  351. fileNames := make([]string, len(files))
  352. for i, fi := range files {
  353. fileNames[i] = filesDict[fi]
  354. }
  355. pfl = append(pfl, authorFiles{peopleDict[peopleIdx], fileNames})
  356. }
  357. }
  358. sort.Sort(pfl)
  359. return pfl
  360. }
  361. type authorFiles struct {
  362. Author string
  363. Files []string
  364. }
  365. type authorFilesList []authorFiles
  366. func (s authorFilesList) Len() int {
  367. return len(s)
  368. }
  369. func (s authorFilesList) Swap(i, j int) {
  370. s[i], s[j] = s[j], s[i]
  371. }
  372. func (s authorFilesList) Less(i, j int) bool {
  373. return len(s[i].Files) < len(s[j].Files)
  374. }
  375. func (couples *CouplesAnalysis) serializeBinary(result *CouplesResult, writer io.Writer) error {
  376. message := pb.CouplesAnalysisResults{}
  377. message.FileCouples = &pb.Couples{
  378. Index: result.Files,
  379. Matrix: pb.MapToCompressedSparseRowMatrix(result.FilesMatrix),
  380. }
  381. message.PeopleCouples = &pb.Couples{
  382. Index: result.reversedPeopleDict,
  383. Matrix: pb.MapToCompressedSparseRowMatrix(result.PeopleMatrix),
  384. }
  385. message.PeopleFiles = make([]*pb.TouchedFiles, len(result.reversedPeopleDict))
  386. for key := range result.reversedPeopleDict {
  387. files := result.PeopleFiles[key]
  388. int32Files := make([]int32, len(files))
  389. for i, f := range files {
  390. int32Files[i] = int32(f)
  391. }
  392. message.PeopleFiles[key] = &pb.TouchedFiles{
  393. Files: int32Files,
  394. }
  395. }
  396. serialized, err := proto.Marshal(&message)
  397. if err != nil {
  398. return err
  399. }
  400. writer.Write(serialized)
  401. return nil
  402. }
  403. func init() {
  404. Registry.Register(&CouplesAnalysis{})
  405. }