identity.go 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. package hercules
  2. import (
  3. "bufio"
  4. "os"
  5. "sort"
  6. "strings"
  7. "gopkg.in/src-d/go-git.v4"
  8. "gopkg.in/src-d/go-git.v4/plumbing/object"
  9. )
  10. type IdentityDetector struct {
  11. // Maps email || name -> developer id.
  12. PeopleDict map[string]int
  13. // Maps developer id -> description
  14. ReversedPeopleDict []string
  15. }
  16. const (
  17. MISSING_AUTHOR = (1 << 18) - 1
  18. SELF_AUTHOR = (1 << 18) - 2
  19. UNMATCHED_AUTHOR = "<unmatched>"
  20. FactIdentityDetectorPeopleDict = "IdentityDetector.PeopleDict"
  21. FactIdentityDetectorReversedPeopleDict = "IdentityDetector.ReversedPeopleDict"
  22. ConfigIdentityDetectorPeopleDictPath = "IdentityDetector.PeopleDictPath"
  23. FactIdentityDetectorPeopleCount = "IdentityDetector.PeopleCount"
  24. )
  25. func (id *IdentityDetector) Name() string {
  26. return "IdentityDetector"
  27. }
  28. func (id *IdentityDetector) Provides() []string {
  29. arr := [...]string{"author"}
  30. return arr[:]
  31. }
  32. func (id *IdentityDetector) Requires() []string {
  33. return []string{}
  34. }
  35. func (id *IdentityDetector) ListConfigurationOptions() []ConfigurationOption {
  36. options := [...]ConfigurationOption{{
  37. Name: ConfigIdentityDetectorPeopleDictPath,
  38. Description: "Path to the developers' email associations.",
  39. Flag: "people-dict",
  40. Type: StringConfigurationOption,
  41. Default: ""},
  42. }
  43. return options[:]
  44. }
  45. func (id *IdentityDetector) Configure(facts map[string]interface{}) {
  46. if val, exists := facts[FactIdentityDetectorPeopleDict].(map[string]int); exists {
  47. id.PeopleDict = val
  48. }
  49. if val, exists := facts[FactIdentityDetectorReversedPeopleDict].([]string); exists {
  50. id.ReversedPeopleDict = val
  51. }
  52. if id.PeopleDict == nil || id.ReversedPeopleDict == nil {
  53. peopleDictPath, _ := facts[ConfigIdentityDetectorPeopleDictPath].(string)
  54. if peopleDictPath != "" {
  55. id.LoadPeopleDict(peopleDictPath)
  56. facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict) - 1
  57. } else {
  58. if _, exists := facts[FactPipelineCommits]; !exists {
  59. panic("IdentityDetector needs a list of commits to initialize.")
  60. }
  61. id.GeneratePeopleDict(facts[FactPipelineCommits].([]*object.Commit))
  62. facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict)
  63. }
  64. } else {
  65. facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict)
  66. }
  67. facts[FactIdentityDetectorPeopleDict] = id.PeopleDict
  68. facts[FactIdentityDetectorReversedPeopleDict] = id.ReversedPeopleDict
  69. }
  70. func (id *IdentityDetector) Initialize(repository *git.Repository) {
  71. }
  72. func (self *IdentityDetector) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  73. commit := deps["commit"].(*object.Commit)
  74. signature := commit.Author
  75. id, exists := self.PeopleDict[strings.ToLower(signature.Email)]
  76. if !exists {
  77. id, exists = self.PeopleDict[strings.ToLower(signature.Name)]
  78. if !exists {
  79. id = MISSING_AUTHOR
  80. }
  81. }
  82. return map[string]interface{}{"author": id}, nil
  83. }
  84. func (id *IdentityDetector) LoadPeopleDict(path string) error {
  85. file, err := os.Open(path)
  86. if err != nil {
  87. return err
  88. }
  89. defer file.Close()
  90. scanner := bufio.NewScanner(file)
  91. dict := make(map[string]int)
  92. reverse_dict := []string{}
  93. size := 0
  94. for scanner.Scan() {
  95. ids := strings.Split(scanner.Text(), "|")
  96. for _, id := range ids {
  97. dict[strings.ToLower(id)] = size
  98. }
  99. reverse_dict = append(reverse_dict, ids[0])
  100. size += 1
  101. }
  102. reverse_dict = append(reverse_dict, UNMATCHED_AUTHOR)
  103. id.PeopleDict = dict
  104. id.ReversedPeopleDict = reverse_dict
  105. return nil
  106. }
  107. func (id *IdentityDetector) GeneratePeopleDict(commits []*object.Commit) {
  108. dict := map[string]int{}
  109. emails := map[int][]string{}
  110. names := map[int][]string{}
  111. size := 0
  112. mailmapFile, err := commits[len(commits)-1].File(".mailmap")
  113. if err == nil {
  114. mailMapContents, err := mailmapFile.Contents()
  115. if err == nil {
  116. mailmap := ParseMailmap(mailMapContents)
  117. for key, val := range mailmap {
  118. key = strings.ToLower(key)
  119. toEmail := strings.ToLower(val.Email)
  120. toName := strings.ToLower(val.Name)
  121. id, exists := dict[toEmail]
  122. if !exists {
  123. id, exists = dict[toName]
  124. }
  125. if exists {
  126. dict[key] = id
  127. } else {
  128. id = size
  129. size++
  130. if toEmail != "" {
  131. dict[toEmail] = id
  132. emails[id] = append(emails[id], toEmail)
  133. }
  134. if toName != "" {
  135. dict[toName] = id
  136. names[id] = append(names[id], toName)
  137. }
  138. dict[key] = id
  139. }
  140. if strings.Contains(key, "@") {
  141. exists := false
  142. for _, val := range emails[id] {
  143. if key == val {
  144. exists = true
  145. break
  146. }
  147. }
  148. if !exists {
  149. emails[id] = append(emails[id], key)
  150. }
  151. } else {
  152. exists := false
  153. for _, val := range names[id] {
  154. if key == val {
  155. exists = true
  156. break
  157. }
  158. }
  159. if !exists {
  160. names[id] = append(names[id], key)
  161. }
  162. }
  163. }
  164. }
  165. }
  166. for _, commit := range commits {
  167. email := strings.ToLower(commit.Author.Email)
  168. name := strings.ToLower(commit.Author.Name)
  169. id, exists := dict[email]
  170. if exists {
  171. _, exists := dict[name]
  172. if !exists {
  173. dict[name] = id
  174. names[id] = append(names[id], name)
  175. }
  176. continue
  177. }
  178. id, exists = dict[name]
  179. if exists {
  180. dict[email] = id
  181. emails[id] = append(emails[id], email)
  182. continue
  183. }
  184. dict[email] = size
  185. dict[name] = size
  186. emails[size] = append(emails[size], email)
  187. names[size] = append(names[size], name)
  188. size += 1
  189. }
  190. reverse_dict := make([]string, size)
  191. for _, val := range dict {
  192. sort.Strings(names[val])
  193. sort.Strings(emails[val])
  194. reverse_dict[val] = strings.Join(names[val], "|") + "|" + strings.Join(emails[val], "|")
  195. }
  196. id.PeopleDict = dict
  197. id.ReversedPeopleDict = reverse_dict
  198. }
  199. func init() {
  200. Registry.Register(&IdentityDetector{})
  201. }