identity.go 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. package hercules
  2. import (
  3. "bufio"
  4. "os"
  5. "sort"
  6. "strings"
  7. "gopkg.in/src-d/go-git.v4"
  8. "gopkg.in/src-d/go-git.v4/plumbing/object"
  9. )
  10. type IdentityDetector struct {
  11. // Maps email || name -> developer id.
  12. PeopleDict map[string]int
  13. // Maps developer id -> description
  14. ReversedPeopleDict []string
  15. }
  16. const (
  17. MISSING_AUTHOR = (1 << 18) - 1
  18. SELF_AUTHOR = (1 << 18) - 2
  19. FactIdentityDetectorPeopleDict = "IdentityDetector.PeopleDict"
  20. FactIdentityDetectorReversedPeopleDict = "IdentityDetector.ReversedPeopleDict"
  21. ConfigIdentityDetectorPeopleDictPath = "IdentityDetector.PeopleDictPath"
  22. FactIdentityDetectorPeopleCount = "IdentityDetector.PeopleCount"
  23. )
  24. func (id *IdentityDetector) Name() string {
  25. return "IdentityDetector"
  26. }
  27. func (id *IdentityDetector) Provides() []string {
  28. arr := [...]string{"author"}
  29. return arr[:]
  30. }
  31. func (id *IdentityDetector) Requires() []string {
  32. return []string{}
  33. }
  34. func (id *IdentityDetector) ListConfigurationOptions() []ConfigurationOption {
  35. options := [...]ConfigurationOption{{
  36. Name: ConfigIdentityDetectorPeopleDictPath,
  37. Description: "Path to the developers' email associations.",
  38. Flag: "people-dict",
  39. Type: StringConfigurationOption,
  40. Default: ""},
  41. }
  42. return options[:]
  43. }
  44. func (id *IdentityDetector) Configure(facts map[string]interface{}) {
  45. if val, exists := facts[FactIdentityDetectorPeopleDict].(map[string]int); exists {
  46. id.PeopleDict = val
  47. }
  48. if val, exists := facts[FactIdentityDetectorReversedPeopleDict].([]string); exists {
  49. id.ReversedPeopleDict = val
  50. }
  51. if id.PeopleDict == nil {
  52. if id.ReversedPeopleDict != nil {
  53. panic("IdentityDetector: ReversedPeopleDict != nil while PeopleDict == nil")
  54. }
  55. peopleDictPath, _ := facts[ConfigIdentityDetectorPeopleDictPath].(string)
  56. if peopleDictPath != "" {
  57. id.LoadPeopleDict(peopleDictPath)
  58. facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict) - 1
  59. } else {
  60. id.GeneratePeopleDict(facts["commits"].([]*object.Commit))
  61. facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict)
  62. }
  63. } else {
  64. facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict)
  65. }
  66. facts[FactIdentityDetectorPeopleDict] = id.PeopleDict
  67. facts[FactIdentityDetectorReversedPeopleDict] = id.ReversedPeopleDict
  68. }
  69. func (id *IdentityDetector) Initialize(repository *git.Repository) {
  70. }
  71. func (self *IdentityDetector) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  72. commit := deps["commit"].(*object.Commit)
  73. signature := commit.Author
  74. id, exists := self.PeopleDict[strings.ToLower(signature.Email)]
  75. if !exists {
  76. id, exists = self.PeopleDict[strings.ToLower(signature.Name)]
  77. if !exists {
  78. id = MISSING_AUTHOR
  79. }
  80. }
  81. return map[string]interface{}{"author": id}, nil
  82. }
  83. func (id *IdentityDetector) LoadPeopleDict(path string) error {
  84. file, err := os.Open(path)
  85. if err != nil {
  86. return err
  87. }
  88. defer file.Close()
  89. scanner := bufio.NewScanner(file)
  90. dict := make(map[string]int)
  91. reverse_dict := []string{}
  92. size := 0
  93. for scanner.Scan() {
  94. ids := strings.Split(scanner.Text(), "|")
  95. for _, id := range ids {
  96. dict[strings.ToLower(id)] = size
  97. }
  98. reverse_dict = append(reverse_dict, ids[0])
  99. size += 1
  100. }
  101. reverse_dict = append(reverse_dict, "<unmatched>")
  102. id.PeopleDict = dict
  103. id.ReversedPeopleDict = reverse_dict
  104. return nil
  105. }
  106. func (id *IdentityDetector) GeneratePeopleDict(commits []*object.Commit) {
  107. dict := map[string]int{}
  108. emails := map[int][]string{}
  109. names := map[int][]string{}
  110. size := 0
  111. mailmapFile, err := commits[len(commits)-1].File(".mailmap")
  112. if err == nil {
  113. mailMapContents, err := mailmapFile.Contents()
  114. if err == nil {
  115. mailmap := ParseMailmap(mailMapContents)
  116. for key, val := range mailmap {
  117. key = strings.ToLower(key)
  118. toEmail := strings.ToLower(val.Email)
  119. toName := strings.ToLower(val.Name)
  120. id, exists := dict[toEmail]
  121. if !exists {
  122. id, exists = dict[toName]
  123. }
  124. if exists {
  125. dict[key] = id
  126. } else {
  127. id = size
  128. size++
  129. if toEmail != "" {
  130. dict[toEmail] = id
  131. emails[id] = append(emails[id], toEmail)
  132. }
  133. if toName != "" {
  134. dict[toName] = id
  135. names[id] = append(names[id], toName)
  136. }
  137. dict[key] = id
  138. }
  139. if strings.Contains(key, "@") {
  140. exists := false
  141. for _, val := range emails[id] {
  142. if key == val {
  143. exists = true
  144. break
  145. }
  146. }
  147. if !exists {
  148. emails[id] = append(emails[id], key)
  149. }
  150. } else {
  151. exists := false
  152. for _, val := range names[id] {
  153. if key == val {
  154. exists = true
  155. break
  156. }
  157. }
  158. if !exists {
  159. names[id] = append(names[id], key)
  160. }
  161. }
  162. }
  163. }
  164. }
  165. for _, commit := range commits {
  166. email := strings.ToLower(commit.Author.Email)
  167. name := strings.ToLower(commit.Author.Name)
  168. id, exists := dict[email]
  169. if exists {
  170. _, exists := dict[name]
  171. if !exists {
  172. dict[name] = id
  173. names[id] = append(names[id], name)
  174. }
  175. continue
  176. }
  177. id, exists = dict[name]
  178. if exists {
  179. dict[email] = id
  180. emails[id] = append(emails[id], email)
  181. continue
  182. }
  183. dict[email] = size
  184. dict[name] = size
  185. emails[size] = append(emails[size], email)
  186. names[size] = append(names[size], name)
  187. size += 1
  188. }
  189. reverse_dict := make([]string, size)
  190. for _, val := range dict {
  191. sort.Strings(names[val])
  192. sort.Strings(emails[val])
  193. reverse_dict[val] = strings.Join(names[val], "|") + "|" + strings.Join(emails[val], "|")
  194. }
  195. id.PeopleDict = dict
  196. id.ReversedPeopleDict = reverse_dict
  197. }
  198. func init() {
  199. Registry.Register(&IdentityDetector{})
  200. }