identity.go 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. package hercules
  2. import (
  3. "bufio"
  4. "os"
  5. "sort"
  6. "strings"
  7. "gopkg.in/src-d/go-git.v4"
  8. "gopkg.in/src-d/go-git.v4/plumbing/object"
  9. )
  10. type IdentityDetector struct {
  11. // Maps email || name -> developer id.
  12. PeopleDict map[string]int
  13. // Maps developer id -> description
  14. ReversedPeopleDict []string
  15. }
  16. const MISSING_AUTHOR = (1 << 18) - 1
  17. const SELF_AUTHOR = (1 << 18) - 2
  18. func (id *IdentityDetector) Name() string {
  19. return "IdentityDetector"
  20. }
  21. func (id *IdentityDetector) Provides() []string {
  22. arr := [...]string{"author"}
  23. return arr[:]
  24. }
  25. func (id *IdentityDetector) Requires() []string {
  26. return []string{}
  27. }
  28. func (id *IdentityDetector) Construct(facts map[string]interface{}) {
  29. if val, exists := facts["IdentityDetector.PeopleDict"].(map[string]int); exists {
  30. id.PeopleDict = val
  31. }
  32. if val, exists := facts["IdentityDetector.ReversedPeopleDict"].([]string); exists {
  33. id.ReversedPeopleDict = val
  34. }
  35. if id.PeopleDict == nil {
  36. if id.ReversedPeopleDict != nil {
  37. panic("IdentityDetector: ReversedPeopleDict != nil while PeopleDict == nil")
  38. }
  39. peopleDictPath := facts["IdentityDetector.PeopleDictPath"].(string)
  40. if peopleDictPath != "" {
  41. id.LoadPeopleDict(peopleDictPath)
  42. facts["PeopleCount"] = len(id.ReversedPeopleDict) - 1
  43. } else {
  44. id.GeneratePeopleDict(facts["commits"].([]*object.Commit))
  45. facts["PeopleCount"] = len(id.ReversedPeopleDict)
  46. }
  47. } else {
  48. facts["PeopleCount"] = len(id.ReversedPeopleDict)
  49. }
  50. }
  51. func (id *IdentityDetector) Initialize(repository *git.Repository) {
  52. }
  53. func (self *IdentityDetector) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  54. commit := deps["commit"].(*object.Commit)
  55. signature := commit.Author
  56. id, exists := self.PeopleDict[strings.ToLower(signature.Email)]
  57. if !exists {
  58. id, exists = self.PeopleDict[strings.ToLower(signature.Name)]
  59. if !exists {
  60. id = MISSING_AUTHOR
  61. }
  62. }
  63. return map[string]interface{}{"author": id}, nil
  64. }
  65. func (id *IdentityDetector) Finalize() interface{} {
  66. return nil
  67. }
  68. func (id *IdentityDetector) LoadPeopleDict(path string) error {
  69. file, err := os.Open(path)
  70. if err != nil {
  71. return err
  72. }
  73. defer file.Close()
  74. scanner := bufio.NewScanner(file)
  75. dict := make(map[string]int)
  76. reverse_dict := []string{}
  77. size := 0
  78. for scanner.Scan() {
  79. ids := strings.Split(scanner.Text(), "|")
  80. for _, id := range ids {
  81. dict[strings.ToLower(id)] = size
  82. }
  83. reverse_dict = append(reverse_dict, ids[0])
  84. size += 1
  85. }
  86. reverse_dict = append(reverse_dict, "<unmatched>")
  87. id.PeopleDict = dict
  88. id.ReversedPeopleDict = reverse_dict
  89. return nil
  90. }
  91. func (id *IdentityDetector) GeneratePeopleDict(commits []*object.Commit) {
  92. dict := map[string]int{}
  93. emails := map[int][]string{}
  94. names := map[int][]string{}
  95. size := 0
  96. mailmapFile, err := commits[len(commits)-1].File(".mailmap")
  97. if err == nil {
  98. mailMapContents, err := mailmapFile.Contents()
  99. if err == nil {
  100. mailmap := ParseMailmap(mailMapContents)
  101. for key, val := range mailmap {
  102. key = strings.ToLower(key)
  103. toEmail := strings.ToLower(val.Email)
  104. toName := strings.ToLower(val.Name)
  105. id, exists := dict[toEmail]
  106. if !exists {
  107. id, exists = dict[toName]
  108. }
  109. if exists {
  110. dict[key] = id
  111. } else {
  112. id = size
  113. size++
  114. if toEmail != "" {
  115. dict[toEmail] = id
  116. emails[id] = append(emails[id], toEmail)
  117. }
  118. if toName != "" {
  119. dict[toName] = id
  120. names[id] = append(names[id], toName)
  121. }
  122. dict[key] = id
  123. }
  124. if strings.Contains(key, "@") {
  125. exists := false
  126. for _, val := range emails[id] {
  127. if key == val {
  128. exists = true
  129. break
  130. }
  131. }
  132. if !exists {
  133. emails[id] = append(emails[id], key)
  134. }
  135. } else {
  136. exists := false
  137. for _, val := range names[id] {
  138. if key == val {
  139. exists = true
  140. break
  141. }
  142. }
  143. if !exists {
  144. names[id] = append(names[id], key)
  145. }
  146. }
  147. }
  148. }
  149. }
  150. for _, commit := range commits {
  151. email := strings.ToLower(commit.Author.Email)
  152. name := strings.ToLower(commit.Author.Name)
  153. id, exists := dict[email]
  154. if exists {
  155. _, exists := dict[name]
  156. if !exists {
  157. dict[name] = id
  158. names[id] = append(names[id], name)
  159. }
  160. continue
  161. }
  162. id, exists = dict[name]
  163. if exists {
  164. dict[email] = id
  165. emails[id] = append(emails[id], email)
  166. continue
  167. }
  168. dict[email] = size
  169. dict[name] = size
  170. emails[size] = append(emails[size], email)
  171. names[size] = append(names[size], name)
  172. size += 1
  173. }
  174. reverse_dict := make([]string, size)
  175. for _, val := range dict {
  176. sort.Strings(names[val])
  177. sort.Strings(emails[val])
  178. reverse_dict[val] = strings.Join(names[val], "|") + "|" + strings.Join(emails[val], "|")
  179. }
  180. id.PeopleDict = dict
  181. id.ReversedPeopleDict = reverse_dict
  182. }
  183. func init() {
  184. Registry.Register(&IdentityDetector{})
  185. }