identity.go 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. package hercules
  2. import (
  3. "bufio"
  4. "os"
  5. "sort"
  6. "strings"
  7. "gopkg.in/src-d/go-git.v4"
  8. "gopkg.in/src-d/go-git.v4/plumbing/object"
  9. )
  10. type IdentityDetector struct {
  11. // Maps email || name -> developer id.
  12. PeopleDict map[string]int
  13. // Maps developer id -> description
  14. ReversedPeopleDict []string
  15. }
  16. const (
  17. MISSING_AUTHOR = (1 << 18) - 1
  18. SELF_AUTHOR = (1 << 18) - 2
  19. UNMATCHED_AUTHOR = "<unmatched>"
  20. FactIdentityDetectorPeopleDict = "IdentityDetector.PeopleDict"
  21. FactIdentityDetectorReversedPeopleDict = "IdentityDetector.ReversedPeopleDict"
  22. ConfigIdentityDetectorPeopleDictPath = "IdentityDetector.PeopleDictPath"
  23. FactIdentityDetectorPeopleCount = "IdentityDetector.PeopleCount"
  24. )
  25. func (id *IdentityDetector) Name() string {
  26. return "IdentityDetector"
  27. }
  28. func (id *IdentityDetector) Provides() []string {
  29. arr := [...]string{"author"}
  30. return arr[:]
  31. }
  32. func (id *IdentityDetector) Requires() []string {
  33. return []string{}
  34. }
  35. func (id *IdentityDetector) ListConfigurationOptions() []ConfigurationOption {
  36. options := [...]ConfigurationOption{{
  37. Name: ConfigIdentityDetectorPeopleDictPath,
  38. Description: "Path to the developers' email associations.",
  39. Flag: "people-dict",
  40. Type: StringConfigurationOption,
  41. Default: ""},
  42. }
  43. return options[:]
  44. }
  45. func (id *IdentityDetector) Configure(facts map[string]interface{}) {
  46. if val, exists := facts[FactIdentityDetectorPeopleDict].(map[string]int); exists {
  47. id.PeopleDict = val
  48. }
  49. if val, exists := facts[FactIdentityDetectorReversedPeopleDict].([]string); exists {
  50. id.ReversedPeopleDict = val
  51. }
  52. if id.PeopleDict == nil || id.ReversedPeopleDict == nil {
  53. peopleDictPath, _ := facts[ConfigIdentityDetectorPeopleDictPath].(string)
  54. if peopleDictPath != "" {
  55. id.LoadPeopleDict(peopleDictPath)
  56. facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict) - 1
  57. } else {
  58. id.GeneratePeopleDict(facts["commits"].([]*object.Commit))
  59. facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict)
  60. }
  61. } else {
  62. facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict)
  63. }
  64. facts[FactIdentityDetectorPeopleDict] = id.PeopleDict
  65. facts[FactIdentityDetectorReversedPeopleDict] = id.ReversedPeopleDict
  66. }
  67. func (id *IdentityDetector) Initialize(repository *git.Repository) {
  68. }
  69. func (self *IdentityDetector) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  70. commit := deps["commit"].(*object.Commit)
  71. signature := commit.Author
  72. id, exists := self.PeopleDict[strings.ToLower(signature.Email)]
  73. if !exists {
  74. id, exists = self.PeopleDict[strings.ToLower(signature.Name)]
  75. if !exists {
  76. id = MISSING_AUTHOR
  77. }
  78. }
  79. return map[string]interface{}{"author": id}, nil
  80. }
  81. func (id *IdentityDetector) LoadPeopleDict(path string) error {
  82. file, err := os.Open(path)
  83. if err != nil {
  84. return err
  85. }
  86. defer file.Close()
  87. scanner := bufio.NewScanner(file)
  88. dict := make(map[string]int)
  89. reverse_dict := []string{}
  90. size := 0
  91. for scanner.Scan() {
  92. ids := strings.Split(scanner.Text(), "|")
  93. for _, id := range ids {
  94. dict[strings.ToLower(id)] = size
  95. }
  96. reverse_dict = append(reverse_dict, ids[0])
  97. size += 1
  98. }
  99. reverse_dict = append(reverse_dict, UNMATCHED_AUTHOR)
  100. id.PeopleDict = dict
  101. id.ReversedPeopleDict = reverse_dict
  102. return nil
  103. }
  104. func (id *IdentityDetector) GeneratePeopleDict(commits []*object.Commit) {
  105. dict := map[string]int{}
  106. emails := map[int][]string{}
  107. names := map[int][]string{}
  108. size := 0
  109. mailmapFile, err := commits[len(commits)-1].File(".mailmap")
  110. if err == nil {
  111. mailMapContents, err := mailmapFile.Contents()
  112. if err == nil {
  113. mailmap := ParseMailmap(mailMapContents)
  114. for key, val := range mailmap {
  115. key = strings.ToLower(key)
  116. toEmail := strings.ToLower(val.Email)
  117. toName := strings.ToLower(val.Name)
  118. id, exists := dict[toEmail]
  119. if !exists {
  120. id, exists = dict[toName]
  121. }
  122. if exists {
  123. dict[key] = id
  124. } else {
  125. id = size
  126. size++
  127. if toEmail != "" {
  128. dict[toEmail] = id
  129. emails[id] = append(emails[id], toEmail)
  130. }
  131. if toName != "" {
  132. dict[toName] = id
  133. names[id] = append(names[id], toName)
  134. }
  135. dict[key] = id
  136. }
  137. if strings.Contains(key, "@") {
  138. exists := false
  139. for _, val := range emails[id] {
  140. if key == val {
  141. exists = true
  142. break
  143. }
  144. }
  145. if !exists {
  146. emails[id] = append(emails[id], key)
  147. }
  148. } else {
  149. exists := false
  150. for _, val := range names[id] {
  151. if key == val {
  152. exists = true
  153. break
  154. }
  155. }
  156. if !exists {
  157. names[id] = append(names[id], key)
  158. }
  159. }
  160. }
  161. }
  162. }
  163. for _, commit := range commits {
  164. email := strings.ToLower(commit.Author.Email)
  165. name := strings.ToLower(commit.Author.Name)
  166. id, exists := dict[email]
  167. if exists {
  168. _, exists := dict[name]
  169. if !exists {
  170. dict[name] = id
  171. names[id] = append(names[id], name)
  172. }
  173. continue
  174. }
  175. id, exists = dict[name]
  176. if exists {
  177. dict[email] = id
  178. emails[id] = append(emails[id], email)
  179. continue
  180. }
  181. dict[email] = size
  182. dict[name] = size
  183. emails[size] = append(emails[size], email)
  184. names[size] = append(names[size], name)
  185. size += 1
  186. }
  187. reverse_dict := make([]string, size)
  188. for _, val := range dict {
  189. sort.Strings(names[val])
  190. sort.Strings(emails[val])
  191. reverse_dict[val] = strings.Join(names[val], "|") + "|" + strings.Join(emails[val], "|")
  192. }
  193. id.PeopleDict = dict
  194. id.ReversedPeopleDict = reverse_dict
  195. }
  196. func init() {
  197. Registry.Register(&IdentityDetector{})
  198. }