couples.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670
  1. package leaves
  2. import (
  3. "fmt"
  4. "io"
  5. "sort"
  6. "github.com/gogo/protobuf/proto"
  7. "gopkg.in/src-d/go-git.v4"
  8. "gopkg.in/src-d/go-git.v4/plumbing/object"
  9. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  10. "gopkg.in/src-d/hercules.v10/internal/core"
  11. "gopkg.in/src-d/hercules.v10/internal/pb"
  12. items "gopkg.in/src-d/hercules.v10/internal/plumbing"
  13. "gopkg.in/src-d/hercules.v10/internal/plumbing/identity"
  14. "gopkg.in/src-d/hercules.v10/internal/yaml"
  15. )
  16. // CouplesAnalysis calculates the number of common commits for files and authors.
  17. // The results are matrices, where cell at row X and column Y is the number of commits which
  18. // changed X and Y together. In case with people, the numbers are summed for every common file.
  19. type CouplesAnalysis struct {
  20. core.NoopMerger
  21. core.OneShotMergeProcessor
  22. // PeopleNumber is the number of developers for which to build the matrix. 0 disables this analysis.
  23. PeopleNumber int
  24. // people store how many times every developer committed to every file.
  25. people []map[string]int
  26. // peopleCommits is the number of commits each author made.
  27. peopleCommits []int
  28. // files store every file occurred in the same commit with every other file.
  29. files map[string]map[string]int
  30. // renames point from new file name to old file name.
  31. renames *[]rename
  32. // lastCommit is the last commit which was consumed.
  33. lastCommit *object.Commit
  34. // reversedPeopleDict references IdentityDetector.ReversedPeopleDict
  35. reversedPeopleDict []string
  36. l core.Logger
  37. }
  38. // CouplesResult is returned by CouplesAnalysis.Finalize() and carries couples matrices from
  39. // authors and files.
  40. type CouplesResult struct {
  41. // PeopleMatrix is how many times developers changed files which were also changed by other developers.
  42. // The mapping's key is the other developer, and the value is the sum over all the files both developers changed.
  43. // Each element of that sum is min(C1, C2) where Ci is the number of commits developer i made which touched the file.
  44. PeopleMatrix []map[int]int64
  45. // PeopleFiles is how many times developers changed files. The first dimension (left []) is developers,
  46. // and the second dimension (right []) is file indexes.
  47. PeopleFiles [][]int
  48. // FilesMatrix is how many times file pairs occurred in the same commit.
  49. FilesMatrix []map[int]int64
  50. // FilesLines is the number of lines contained in each file from the last analyzed commit.
  51. FilesLines []int
  52. // Files is the names of the files. The order matches PeopleFiles' indexes and FilesMatrix.
  53. Files []string
  54. // reversedPeopleDict references IdentityDetector.ReversedPeopleDict
  55. reversedPeopleDict []string
  56. }
  57. const (
  58. // CouplesMaximumMeaningfulContextSize is the threshold on the number of files in a commit to
  59. // consider them as grouped together.
  60. CouplesMaximumMeaningfulContextSize = 1000
  61. )
  62. type rename struct {
  63. FromName string
  64. ToName string
  65. }
  66. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  67. func (couples *CouplesAnalysis) Name() string {
  68. return "Couples"
  69. }
  70. // Provides returns the list of names of entities which are produced by this PipelineItem.
  71. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  72. // to this list. Also used by core.Registry to build the global map of providers.
  73. func (couples *CouplesAnalysis) Provides() []string {
  74. return []string{}
  75. }
  76. // Requires returns the list of names of entities which are needed by this PipelineItem.
  77. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  78. // entities are Provides() upstream.
  79. func (couples *CouplesAnalysis) Requires() []string {
  80. arr := [...]string{identity.DependencyAuthor, items.DependencyTreeChanges}
  81. return arr[:]
  82. }
  83. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  84. func (couples *CouplesAnalysis) ListConfigurationOptions() []core.ConfigurationOption {
  85. return []core.ConfigurationOption{}
  86. }
  87. // Configure sets the properties previously published by ListConfigurationOptions().
  88. func (couples *CouplesAnalysis) Configure(facts map[string]interface{}) error {
  89. if l, exists := facts[core.ConfigLogger].(core.Logger); exists {
  90. couples.l = l
  91. }
  92. if val, exists := facts[identity.FactIdentityDetectorPeopleCount].(int); exists {
  93. couples.PeopleNumber = val
  94. couples.reversedPeopleDict = facts[identity.FactIdentityDetectorReversedPeopleDict].([]string)
  95. }
  96. return nil
  97. }
  98. // Flag for the command line switch which enables this analysis.
  99. func (couples *CouplesAnalysis) Flag() string {
  100. return "couples"
  101. }
  102. // Description returns the text which explains what the analysis is doing.
  103. func (couples *CouplesAnalysis) Description() string {
  104. return "The result is a square matrix, the value in each cell corresponds to the number " +
  105. "of times the pair of files appeared in the same commit or pair of developers " +
  106. "committed to the same file."
  107. }
  108. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  109. // calls. The repository which is going to be analysed is supplied as an argument.
  110. func (couples *CouplesAnalysis) Initialize(repository *git.Repository) error {
  111. couples.l = core.NewLogger()
  112. couples.people = make([]map[string]int, couples.PeopleNumber+1)
  113. for i := range couples.people {
  114. couples.people[i] = map[string]int{}
  115. }
  116. couples.peopleCommits = make([]int, couples.PeopleNumber+1)
  117. couples.files = map[string]map[string]int{}
  118. couples.renames = &[]rename{}
  119. couples.OneShotMergeProcessor.Initialize()
  120. return nil
  121. }
  122. // Consume runs this PipelineItem on the next commit data.
  123. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  124. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  125. // This function returns the mapping with analysis results. The keys must be the same as
  126. // in Provides(). If there was an error, nil is returned.
  127. func (couples *CouplesAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  128. firstMerge := couples.ShouldConsumeCommit(deps)
  129. mergeMode := deps[core.DependencyIsMerge].(bool)
  130. couples.lastCommit = deps[core.DependencyCommit].(*object.Commit)
  131. author := deps[identity.DependencyAuthor].(int)
  132. if author == identity.AuthorMissing {
  133. author = couples.PeopleNumber
  134. }
  135. if firstMerge {
  136. couples.peopleCommits[author]++
  137. }
  138. treeDiff := deps[items.DependencyTreeChanges].(object.Changes)
  139. context := make([]string, 0, len(treeDiff))
  140. for _, change := range treeDiff {
  141. action, err := change.Action()
  142. if err != nil {
  143. return nil, err
  144. }
  145. toName := change.To.Name
  146. fromName := change.From.Name
  147. switch action {
  148. case merkletrie.Insert:
  149. if !mergeMode || couples.files[toName] == nil {
  150. context = append(context, toName)
  151. couples.people[author][toName]++
  152. }
  153. case merkletrie.Delete:
  154. if !mergeMode {
  155. couples.people[author][fromName]++
  156. }
  157. case merkletrie.Modify:
  158. if fromName != toName {
  159. // renamed
  160. *couples.renames = append(
  161. *couples.renames, rename{ToName: toName, FromName: fromName})
  162. }
  163. if !mergeMode || couples.files[toName] == nil {
  164. context = append(context, toName)
  165. couples.people[author][toName]++
  166. }
  167. }
  168. }
  169. if len(context) <= CouplesMaximumMeaningfulContextSize {
  170. for _, file := range context {
  171. for _, otherFile := range context {
  172. lane, exists := couples.files[file]
  173. if !exists {
  174. lane = map[string]int{}
  175. couples.files[file] = lane
  176. }
  177. lane[otherFile]++
  178. }
  179. }
  180. }
  181. return nil, nil
  182. }
  183. // Finalize returns the result of the analysis. Further Consume() calls are not expected.
  184. func (couples *CouplesAnalysis) Finalize() interface{} {
  185. files, people := couples.propagateRenames(couples.currentFiles())
  186. filesSequence := make([]string, len(files))
  187. i := 0
  188. for file := range files {
  189. filesSequence[i] = file
  190. i++
  191. }
  192. sort.Strings(filesSequence)
  193. filesIndex := map[string]int{}
  194. for i, file := range filesSequence {
  195. filesIndex[file] = i
  196. }
  197. filesLines := make([]int, len(filesSequence))
  198. for i, name := range filesSequence {
  199. file, err := couples.lastCommit.File(name)
  200. if err != nil {
  201. err := fmt.Errorf("cannot find file %s in commit %s: %v",
  202. name, couples.lastCommit.Hash.String(), err)
  203. couples.l.Error(err)
  204. return err
  205. }
  206. blob := items.CachedBlob{Blob: file.Blob}
  207. err = blob.Cache()
  208. if err != nil {
  209. err := fmt.Errorf("cannot read blob %s of file %s: %v",
  210. blob.Hash.String(), name, err)
  211. couples.l.Error(err)
  212. return err
  213. }
  214. filesLines[i], _ = blob.CountLines()
  215. }
  216. peopleMatrix := make([]map[int]int64, couples.PeopleNumber+1)
  217. peopleFiles := make([][]int, couples.PeopleNumber+1)
  218. for i := range peopleMatrix {
  219. peopleMatrix[i] = map[int]int64{}
  220. for file, commits := range people[i] {
  221. if fi, exists := filesIndex[file]; exists {
  222. peopleFiles[i] = append(peopleFiles[i], fi)
  223. }
  224. for j, otherFiles := range people {
  225. otherCommits := otherFiles[file]
  226. delta := otherCommits
  227. if otherCommits > commits {
  228. delta = commits
  229. }
  230. if delta > 0 {
  231. peopleMatrix[i][j] += int64(delta)
  232. }
  233. }
  234. }
  235. sort.Ints(peopleFiles[i])
  236. }
  237. filesMatrix := make([]map[int]int64, len(filesIndex))
  238. for i := range filesMatrix {
  239. filesMatrix[i] = map[int]int64{}
  240. for otherFile, cooccs := range files[filesSequence[i]] {
  241. filesMatrix[i][filesIndex[otherFile]] = int64(cooccs)
  242. }
  243. }
  244. return CouplesResult{
  245. PeopleMatrix: peopleMatrix,
  246. PeopleFiles: peopleFiles,
  247. Files: filesSequence,
  248. FilesLines: filesLines,
  249. FilesMatrix: filesMatrix,
  250. reversedPeopleDict: couples.reversedPeopleDict,
  251. }
  252. }
  253. // Fork clones this pipeline item.
  254. func (couples *CouplesAnalysis) Fork(n int) []core.PipelineItem {
  255. return core.ForkCopyPipelineItem(couples, n)
  256. }
  257. // Serialize converts the analysis result as returned by Finalize() to text or bytes.
  258. // The text format is YAML and the bytes format is Protocol Buffers.
  259. func (couples *CouplesAnalysis) Serialize(result interface{}, binary bool, writer io.Writer) error {
  260. couplesResult := result.(CouplesResult)
  261. if binary {
  262. return couples.serializeBinary(&couplesResult, writer)
  263. }
  264. couples.serializeText(&couplesResult, writer)
  265. return nil
  266. }
  267. // Deserialize converts the specified protobuf bytes to CouplesResult.
  268. func (couples *CouplesAnalysis) Deserialize(pbmessage []byte) (interface{}, error) {
  269. message := pb.CouplesAnalysisResults{}
  270. err := proto.Unmarshal(pbmessage, &message)
  271. if err != nil {
  272. return nil, err
  273. }
  274. result := CouplesResult{
  275. Files: message.FileCouples.Index,
  276. FilesLines: make([]int, len(message.FileCouples.Index)),
  277. FilesMatrix: make([]map[int]int64, message.FileCouples.Matrix.NumberOfRows),
  278. PeopleFiles: make([][]int, len(message.PeopleCouples.Index)),
  279. PeopleMatrix: make([]map[int]int64, message.PeopleCouples.Matrix.NumberOfRows),
  280. reversedPeopleDict: message.PeopleCouples.Index,
  281. }
  282. for i, files := range message.PeopleFiles {
  283. result.PeopleFiles[i] = make([]int, len(files.Files))
  284. for j, val := range files.Files {
  285. result.PeopleFiles[i][j] = int(val)
  286. }
  287. }
  288. if len(message.FileCouples.Index) != len(message.FilesLines) {
  289. err := fmt.Errorf("Couples PB message integrity violation: file_couples (%d) != file_lines (%d)",
  290. len(message.FileCouples.Index), len(message.FilesLines))
  291. couples.l.Error(err)
  292. return nil, err
  293. }
  294. for i, v := range message.FilesLines {
  295. result.FilesLines[i] = int(v)
  296. }
  297. convertCSR := func(dest []map[int]int64, src *pb.CompressedSparseRowMatrix) {
  298. for indptr := range src.Indptr {
  299. if indptr == 0 {
  300. continue
  301. }
  302. dest[indptr-1] = map[int]int64{}
  303. for j := src.Indptr[indptr-1]; j < src.Indptr[indptr]; j++ {
  304. dest[indptr-1][int(src.Indices[j])] = src.Data[j]
  305. }
  306. }
  307. }
  308. convertCSR(result.FilesMatrix, message.FileCouples.Matrix)
  309. convertCSR(result.PeopleMatrix, message.PeopleCouples.Matrix)
  310. return result, nil
  311. }
  312. // MergeResults combines two CouplesAnalysis-s together.
  313. func (couples *CouplesAnalysis) MergeResults(r1, r2 interface{}, c1, c2 *core.CommonAnalysisResult) interface{} {
  314. cr1 := r1.(CouplesResult)
  315. cr2 := r2.(CouplesResult)
  316. merged := CouplesResult{}
  317. var people, files map[string]identity.MergedIndex
  318. people, merged.reversedPeopleDict = identity.MergeReversedDictsIdentities(
  319. cr1.reversedPeopleDict, cr2.reversedPeopleDict)
  320. files, merged.Files = identity.MergeReversedDictsLiteral(cr1.Files, cr2.Files)
  321. merged.FilesLines = make([]int, len(merged.Files))
  322. for i, name := range merged.Files {
  323. idxs := files[name]
  324. if idxs.First >= 0 {
  325. merged.FilesLines[i] += cr1.FilesLines[idxs.First]
  326. }
  327. if idxs.Second >= 0 {
  328. merged.FilesLines[i] += cr2.FilesLines[idxs.Second]
  329. }
  330. }
  331. merged.PeopleFiles = make([][]int, len(merged.reversedPeopleDict))
  332. peopleFilesDicts := make([]map[int]bool, len(merged.reversedPeopleDict))
  333. addPeopleFiles := func(peopleFiles [][]int, reversedPeopleDict []string,
  334. reversedFilesDict []string) {
  335. for pi, fs := range peopleFiles {
  336. idx := people[reversedPeopleDict[pi]].Final
  337. m := peopleFilesDicts[idx]
  338. if m == nil {
  339. m = map[int]bool{}
  340. peopleFilesDicts[idx] = m
  341. }
  342. for _, f := range fs {
  343. m[files[reversedFilesDict[f]].Final] = true
  344. }
  345. }
  346. }
  347. addPeopleFiles(cr1.PeopleFiles, cr1.reversedPeopleDict, cr1.Files)
  348. addPeopleFiles(cr2.PeopleFiles, cr2.reversedPeopleDict, cr2.Files)
  349. for i, m := range peopleFilesDicts {
  350. merged.PeopleFiles[i] = make([]int, len(m))
  351. j := 0
  352. for f := range m {
  353. merged.PeopleFiles[i][j] = f
  354. j++
  355. }
  356. sort.Ints(merged.PeopleFiles[i])
  357. }
  358. merged.PeopleMatrix = make([]map[int]int64, len(merged.reversedPeopleDict)+1)
  359. addPeople := func(peopleMatrix []map[int]int64, reversedPeopleDict []string) {
  360. for pi, pc := range peopleMatrix {
  361. var idx int
  362. if pi < len(reversedPeopleDict) {
  363. idx = people[reversedPeopleDict[pi]].Final
  364. } else {
  365. idx = len(merged.reversedPeopleDict)
  366. }
  367. m := merged.PeopleMatrix[idx]
  368. if m == nil {
  369. m = map[int]int64{}
  370. merged.PeopleMatrix[idx] = m
  371. }
  372. for otherDev, val := range pc {
  373. var otherIdx int
  374. if otherDev < len(reversedPeopleDict) {
  375. otherIdx = people[reversedPeopleDict[otherDev]].Final
  376. } else {
  377. otherIdx = len(merged.reversedPeopleDict)
  378. }
  379. m[otherIdx] += val
  380. }
  381. }
  382. }
  383. addPeople(cr1.PeopleMatrix, cr1.reversedPeopleDict)
  384. addPeople(cr2.PeopleMatrix, cr2.reversedPeopleDict)
  385. merged.FilesMatrix = make([]map[int]int64, len(merged.Files))
  386. addFiles := func(filesMatrix []map[int]int64, reversedFilesDict []string) {
  387. for fi, fc := range filesMatrix {
  388. idx := people[reversedFilesDict[fi]].Final
  389. m := merged.FilesMatrix[idx]
  390. if m == nil {
  391. m = map[int]int64{}
  392. merged.FilesMatrix[idx] = m
  393. }
  394. for file, val := range fc {
  395. m[files[reversedFilesDict[file]].Final] += val
  396. }
  397. }
  398. }
  399. addFiles(cr1.FilesMatrix, cr1.Files)
  400. addFiles(cr2.FilesMatrix, cr2.Files)
  401. return merged
  402. }
  403. func (couples *CouplesAnalysis) serializeText(result *CouplesResult, writer io.Writer) {
  404. fmt.Fprintln(writer, " files_coocc:")
  405. fmt.Fprintln(writer, " index:")
  406. for _, file := range result.Files {
  407. fmt.Fprintf(writer, " - %s\n", yaml.SafeString(file))
  408. }
  409. fmt.Fprintln(writer, " lines:")
  410. for _, l := range result.FilesLines {
  411. fmt.Fprintf(writer, " - %d\n", l)
  412. }
  413. fmt.Fprintln(writer, " matrix:")
  414. for _, files := range result.FilesMatrix {
  415. fmt.Fprint(writer, " - {")
  416. var indices []int
  417. for file := range files {
  418. indices = append(indices, file)
  419. }
  420. sort.Ints(indices)
  421. for i, file := range indices {
  422. fmt.Fprintf(writer, "%d: %d", file, files[file])
  423. if i < len(indices)-1 {
  424. fmt.Fprint(writer, ", ")
  425. }
  426. }
  427. fmt.Fprintln(writer, "}")
  428. }
  429. fmt.Fprintln(writer, " people_coocc:")
  430. fmt.Fprintln(writer, " index:")
  431. for _, person := range result.reversedPeopleDict {
  432. fmt.Fprintf(writer, " - %s\n", yaml.SafeString(person))
  433. }
  434. fmt.Fprintln(writer, " matrix:")
  435. for _, people := range result.PeopleMatrix {
  436. fmt.Fprint(writer, " - {")
  437. var indices []int
  438. for file := range people {
  439. indices = append(indices, file)
  440. }
  441. sort.Ints(indices)
  442. for i, person := range indices {
  443. fmt.Fprintf(writer, "%d: %d", person, people[person])
  444. if i < len(indices)-1 {
  445. fmt.Fprint(writer, ", ")
  446. }
  447. }
  448. fmt.Fprintln(writer, "}")
  449. }
  450. fmt.Fprintln(writer, " author_files:") // sorted by number of files each author changed
  451. peopleFiles := sortByNumberOfFiles(result.PeopleFiles, result.reversedPeopleDict, result.Files)
  452. for _, authorFiles := range peopleFiles {
  453. fmt.Fprintf(writer, " - %s:\n", yaml.SafeString(authorFiles.Author))
  454. sort.Strings(authorFiles.Files)
  455. for _, file := range authorFiles.Files {
  456. fmt.Fprintf(writer, " - %s\n", yaml.SafeString(file)) // sorted by path
  457. }
  458. }
  459. }
  460. func sortByNumberOfFiles(
  461. peopleFiles [][]int, peopleDict []string, filesDict []string) authorFilesList {
  462. var pfl authorFilesList
  463. for peopleIdx, files := range peopleFiles {
  464. if peopleIdx < len(peopleDict) {
  465. fileNames := make([]string, len(files))
  466. for i, fi := range files {
  467. fileNames[i] = filesDict[fi]
  468. }
  469. pfl = append(pfl, authorFiles{peopleDict[peopleIdx], fileNames})
  470. }
  471. }
  472. sort.Sort(pfl)
  473. return pfl
  474. }
  475. type authorFiles struct {
  476. Author string
  477. Files []string
  478. }
  479. type authorFilesList []authorFiles
  480. func (s authorFilesList) Len() int {
  481. return len(s)
  482. }
  483. func (s authorFilesList) Swap(i, j int) {
  484. s[i], s[j] = s[j], s[i]
  485. }
  486. func (s authorFilesList) Less(i, j int) bool {
  487. return len(s[i].Files) < len(s[j].Files)
  488. }
  489. func (couples *CouplesAnalysis) serializeBinary(result *CouplesResult, writer io.Writer) error {
  490. message := pb.CouplesAnalysisResults{}
  491. message.FileCouples = &pb.Couples{
  492. Index: result.Files,
  493. Matrix: pb.MapToCompressedSparseRowMatrix(result.FilesMatrix),
  494. }
  495. message.PeopleCouples = &pb.Couples{
  496. Index: result.reversedPeopleDict,
  497. Matrix: pb.MapToCompressedSparseRowMatrix(result.PeopleMatrix),
  498. }
  499. message.PeopleFiles = make([]*pb.TouchedFiles, len(result.reversedPeopleDict))
  500. for key := range result.reversedPeopleDict {
  501. files := result.PeopleFiles[key]
  502. int32Files := make([]int32, len(files))
  503. for i, f := range files {
  504. int32Files[i] = int32(f)
  505. }
  506. message.PeopleFiles[key] = &pb.TouchedFiles{
  507. Files: int32Files,
  508. }
  509. }
  510. message.FilesLines = make([]int32, len(result.FilesLines))
  511. for i, l := range result.FilesLines {
  512. message.FilesLines[i] = int32(l)
  513. }
  514. serialized, err := proto.Marshal(&message)
  515. if err != nil {
  516. return err
  517. }
  518. _, err = writer.Write(serialized)
  519. return err
  520. }
  521. // currentFiles return the list of files in the last consumed commit.
  522. func (couples *CouplesAnalysis) currentFiles() map[string]bool {
  523. files := map[string]bool{}
  524. if couples.lastCommit == nil {
  525. for key := range couples.files {
  526. files[key] = true
  527. }
  528. }
  529. tree, _ := couples.lastCommit.Tree()
  530. fileIter := tree.Files()
  531. fileIter.ForEach(func(fobj *object.File) error {
  532. files[fobj.Name] = true
  533. return nil
  534. })
  535. return files
  536. }
  537. // propagateRenames applies `renames` over the files from `lastCommit`.
  538. func (couples *CouplesAnalysis) propagateRenames(files map[string]bool) (
  539. map[string]map[string]int, []map[string]int) {
  540. renames := *couples.renames
  541. reducedFiles := map[string]map[string]int{}
  542. for file := range files {
  543. fmap := map[string]int{}
  544. refmap := couples.files[file]
  545. for other := range files {
  546. refval := refmap[other]
  547. if refval > 0 {
  548. fmap[other] = refval
  549. }
  550. }
  551. if len(fmap) > 0 {
  552. reducedFiles[file] = fmap
  553. }
  554. }
  555. // propagate renames
  556. aliases := map[string]map[string]bool{}
  557. pointers := map[string]string{}
  558. for i := range renames {
  559. rename := renames[len(renames)-i-1]
  560. toName := rename.ToName
  561. if newTo, exists := pointers[toName]; exists {
  562. toName = newTo
  563. }
  564. if _, exists := reducedFiles[toName]; exists {
  565. if rename.FromName != toName {
  566. var set map[string]bool
  567. if set, exists = aliases[toName]; !exists {
  568. set = map[string]bool{}
  569. aliases[toName] = set
  570. }
  571. set[rename.FromName] = true
  572. pointers[rename.FromName] = toName
  573. }
  574. continue
  575. }
  576. }
  577. adjustments := map[string]map[string]int{}
  578. for final, set := range aliases {
  579. adjustment := map[string]int{}
  580. for alias := range set {
  581. for k, v := range couples.files[alias] {
  582. adjustment[k] += v
  583. }
  584. }
  585. adjustments[final] = adjustment
  586. }
  587. for _, adjustment := range adjustments {
  588. for final, set := range aliases {
  589. for alias := range set {
  590. adjustment[final] += adjustment[alias]
  591. delete(adjustment, alias)
  592. }
  593. }
  594. }
  595. for final, adjustment := range adjustments {
  596. for key, val := range adjustment {
  597. if coocc, exists := reducedFiles[final][key]; exists {
  598. reducedFiles[final][key] = coocc + val
  599. reducedFiles[key][final] = coocc + val
  600. }
  601. }
  602. }
  603. people := make([]map[string]int, len(couples.people))
  604. for i, counts := range couples.people {
  605. reducedCounts := map[string]int{}
  606. people[i] = reducedCounts
  607. for file := range files {
  608. count := counts[file]
  609. for alias := range aliases[file] {
  610. count += counts[alias]
  611. }
  612. if count > 0 {
  613. reducedCounts[file] = count
  614. }
  615. }
  616. for key, val := range counts {
  617. if _, exists := files[key]; !exists {
  618. if _, exists = pointers[key]; !exists {
  619. reducedCounts[key] = val
  620. }
  621. }
  622. }
  623. }
  624. return reducedFiles, people
  625. }
  626. func init() {
  627. core.Registry.Register(&CouplesAnalysis{})
  628. }