identity.go 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. package hercules
  2. import (
  3. "bufio"
  4. "os"
  5. "sort"
  6. "strings"
  7. "gopkg.in/src-d/go-git.v4"
  8. "gopkg.in/src-d/go-git.v4/plumbing/object"
  9. )
  10. type IdentityDetector struct {
  11. // Maps email || name -> developer id.
  12. PeopleDict map[string]int
  13. // Maps developer id -> description
  14. ReversePeopleDict []string
  15. }
  16. const MISSING_AUTHOR = (1 << 18) - 1
  17. const SELF_AUTHOR = (1 << 18) - 2
  18. func (id *IdentityDetector) Name() string {
  19. return "IdentityDetector"
  20. }
  21. func (id *IdentityDetector) Provides() []string {
  22. arr := [...]string{"author"}
  23. return arr[:]
  24. }
  25. func (id *IdentityDetector) Requires() []string {
  26. return []string{}
  27. }
  28. func (id *IdentityDetector) Initialize(repository *git.Repository) {
  29. }
  30. func (self *IdentityDetector) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  31. commit := deps["commit"].(*object.Commit)
  32. signature := commit.Author
  33. id, exists := self.PeopleDict[strings.ToLower(signature.Email)]
  34. if !exists {
  35. id, exists = self.PeopleDict[strings.ToLower(signature.Name)]
  36. if !exists {
  37. id = MISSING_AUTHOR
  38. }
  39. }
  40. return map[string]interface{}{"author": id}, nil
  41. }
  42. func (id *IdentityDetector) Finalize() interface{} {
  43. return nil
  44. }
  45. func (id *IdentityDetector) LoadPeopleDict(path string) error {
  46. file, err := os.Open(path)
  47. if err != nil {
  48. return err
  49. }
  50. defer file.Close()
  51. scanner := bufio.NewScanner(file)
  52. dict := make(map[string]int)
  53. reverse_dict := []string{}
  54. size := 0
  55. for scanner.Scan() {
  56. ids := strings.Split(scanner.Text(), "|")
  57. for _, id := range ids {
  58. dict[strings.ToLower(id)] = size
  59. }
  60. reverse_dict = append(reverse_dict, ids[0])
  61. size += 1
  62. }
  63. reverse_dict = append(reverse_dict, "<unmatched>")
  64. id.PeopleDict = dict
  65. id.ReversePeopleDict = reverse_dict
  66. return nil
  67. }
  68. func (id *IdentityDetector) GeneratePeopleDict(commits []*object.Commit) {
  69. dict := map[string]int{}
  70. emails := map[int][]string{}
  71. names := map[int][]string{}
  72. size := 0
  73. mailmapFile, err := commits[len(commits)-1].File(".mailmap")
  74. if err == nil {
  75. mailMapContents, err := mailmapFile.Contents()
  76. if err == nil {
  77. mailmap := ParseMailmap(mailMapContents)
  78. for key, val := range mailmap {
  79. key = strings.ToLower(key)
  80. toEmail := strings.ToLower(val.Email)
  81. toName := strings.ToLower(val.Name)
  82. id, exists := dict[toEmail]
  83. if !exists {
  84. id, exists = dict[toName]
  85. }
  86. if exists {
  87. dict[key] = id
  88. } else {
  89. id = size
  90. size++
  91. if toEmail != "" {
  92. dict[toEmail] = id
  93. emails[id] = append(emails[id], toEmail)
  94. }
  95. if toName != "" {
  96. dict[toName] = id
  97. names[id] = append(names[id], toName)
  98. }
  99. dict[key] = id
  100. }
  101. if strings.Contains(key, "@") {
  102. exists := false
  103. for _, val := range emails[id] {
  104. if key == val {
  105. exists = true
  106. break
  107. }
  108. }
  109. if !exists {
  110. emails[id] = append(emails[id], key)
  111. }
  112. } else {
  113. exists := false
  114. for _, val := range names[id] {
  115. if key == val {
  116. exists = true
  117. break
  118. }
  119. }
  120. if !exists {
  121. names[id] = append(names[id], key)
  122. }
  123. }
  124. }
  125. }
  126. }
  127. for _, commit := range commits {
  128. email := strings.ToLower(commit.Author.Email)
  129. name := strings.ToLower(commit.Author.Name)
  130. id, exists := dict[email]
  131. if exists {
  132. _, exists := dict[name]
  133. if !exists {
  134. dict[name] = id
  135. names[id] = append(names[id], name)
  136. }
  137. continue
  138. }
  139. id, exists = dict[name]
  140. if exists {
  141. dict[email] = id
  142. emails[id] = append(emails[id], email)
  143. continue
  144. }
  145. dict[email] = size
  146. dict[name] = size
  147. emails[size] = append(emails[size], email)
  148. names[size] = append(names[size], name)
  149. size += 1
  150. }
  151. reverse_dict := make([]string, size)
  152. for _, val := range dict {
  153. sort.Strings(names[val])
  154. sort.Strings(emails[val])
  155. reverse_dict[val] = strings.Join(names[val], "|") + "|" + strings.Join(emails[val], "|")
  156. }
  157. id.PeopleDict = dict
  158. id.ReversePeopleDict = reverse_dict
  159. }