identity.go 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. package hercules
  2. import (
  3. "bufio"
  4. "os"
  5. "sort"
  6. "strings"
  7. "gopkg.in/src-d/go-git.v4"
  8. "gopkg.in/src-d/go-git.v4/plumbing/object"
  9. )
  10. type IdentityDetector struct {
  11. // Maps email || name -> developer id.
  12. PeopleDict map[string]int
  13. // Maps developer id -> description
  14. ReversedPeopleDict []string
  15. }
  16. const (
  17. MISSING_AUTHOR = (1 << 18) - 1
  18. SELF_AUTHOR = (1 << 18) - 2
  19. UNMATCHED_AUTHOR = "<unmatched>"
  20. FactIdentityDetectorPeopleDict = "IdentityDetector.PeopleDict"
  21. FactIdentityDetectorReversedPeopleDict = "IdentityDetector.ReversedPeopleDict"
  22. ConfigIdentityDetectorPeopleDictPath = "IdentityDetector.PeopleDictPath"
  23. FactIdentityDetectorPeopleCount = "IdentityDetector.PeopleCount"
  24. DependencyAuthor = "author"
  25. )
  26. func (id *IdentityDetector) Name() string {
  27. return "IdentityDetector"
  28. }
  29. func (id *IdentityDetector) Provides() []string {
  30. arr := [...]string{DependencyAuthor}
  31. return arr[:]
  32. }
  33. func (id *IdentityDetector) Requires() []string {
  34. return []string{}
  35. }
  36. func (id *IdentityDetector) ListConfigurationOptions() []ConfigurationOption {
  37. options := [...]ConfigurationOption{{
  38. Name: ConfigIdentityDetectorPeopleDictPath,
  39. Description: "Path to the developers' email associations.",
  40. Flag: "people-dict",
  41. Type: StringConfigurationOption,
  42. Default: ""},
  43. }
  44. return options[:]
  45. }
  46. func (id *IdentityDetector) Configure(facts map[string]interface{}) {
  47. if val, exists := facts[FactIdentityDetectorPeopleDict].(map[string]int); exists {
  48. id.PeopleDict = val
  49. }
  50. if val, exists := facts[FactIdentityDetectorReversedPeopleDict].([]string); exists {
  51. id.ReversedPeopleDict = val
  52. }
  53. if id.PeopleDict == nil || id.ReversedPeopleDict == nil {
  54. peopleDictPath, _ := facts[ConfigIdentityDetectorPeopleDictPath].(string)
  55. if peopleDictPath != "" {
  56. id.LoadPeopleDict(peopleDictPath)
  57. facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict) - 1
  58. } else {
  59. if _, exists := facts[FactPipelineCommits]; !exists {
  60. panic("IdentityDetector needs a list of commits to initialize.")
  61. }
  62. id.GeneratePeopleDict(facts[FactPipelineCommits].([]*object.Commit))
  63. facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict)
  64. }
  65. } else {
  66. facts[FactIdentityDetectorPeopleCount] = len(id.ReversedPeopleDict)
  67. }
  68. facts[FactIdentityDetectorPeopleDict] = id.PeopleDict
  69. facts[FactIdentityDetectorReversedPeopleDict] = id.ReversedPeopleDict
  70. }
  71. func (id *IdentityDetector) Initialize(repository *git.Repository) {
  72. }
  73. func (self *IdentityDetector) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  74. commit := deps["commit"].(*object.Commit)
  75. signature := commit.Author
  76. id, exists := self.PeopleDict[strings.ToLower(signature.Email)]
  77. if !exists {
  78. id, exists = self.PeopleDict[strings.ToLower(signature.Name)]
  79. if !exists {
  80. id = MISSING_AUTHOR
  81. }
  82. }
  83. return map[string]interface{}{DependencyAuthor: id}, nil
  84. }
  85. func (id *IdentityDetector) LoadPeopleDict(path string) error {
  86. file, err := os.Open(path)
  87. if err != nil {
  88. return err
  89. }
  90. defer file.Close()
  91. scanner := bufio.NewScanner(file)
  92. dict := make(map[string]int)
  93. reverse_dict := []string{}
  94. size := 0
  95. for scanner.Scan() {
  96. ids := strings.Split(scanner.Text(), "|")
  97. for _, id := range ids {
  98. dict[strings.ToLower(id)] = size
  99. }
  100. reverse_dict = append(reverse_dict, ids[0])
  101. size += 1
  102. }
  103. reverse_dict = append(reverse_dict, UNMATCHED_AUTHOR)
  104. id.PeopleDict = dict
  105. id.ReversedPeopleDict = reverse_dict
  106. return nil
  107. }
  108. func (id *IdentityDetector) GeneratePeopleDict(commits []*object.Commit) {
  109. dict := map[string]int{}
  110. emails := map[int][]string{}
  111. names := map[int][]string{}
  112. size := 0
  113. mailmapFile, err := commits[len(commits)-1].File(".mailmap")
  114. if err == nil {
  115. mailMapContents, err := mailmapFile.Contents()
  116. if err == nil {
  117. mailmap := ParseMailmap(mailMapContents)
  118. for key, val := range mailmap {
  119. key = strings.ToLower(key)
  120. toEmail := strings.ToLower(val.Email)
  121. toName := strings.ToLower(val.Name)
  122. id, exists := dict[toEmail]
  123. if !exists {
  124. id, exists = dict[toName]
  125. }
  126. if exists {
  127. dict[key] = id
  128. } else {
  129. id = size
  130. size++
  131. if toEmail != "" {
  132. dict[toEmail] = id
  133. emails[id] = append(emails[id], toEmail)
  134. }
  135. if toName != "" {
  136. dict[toName] = id
  137. names[id] = append(names[id], toName)
  138. }
  139. dict[key] = id
  140. }
  141. if strings.Contains(key, "@") {
  142. exists := false
  143. for _, val := range emails[id] {
  144. if key == val {
  145. exists = true
  146. break
  147. }
  148. }
  149. if !exists {
  150. emails[id] = append(emails[id], key)
  151. }
  152. } else {
  153. exists := false
  154. for _, val := range names[id] {
  155. if key == val {
  156. exists = true
  157. break
  158. }
  159. }
  160. if !exists {
  161. names[id] = append(names[id], key)
  162. }
  163. }
  164. }
  165. }
  166. }
  167. for _, commit := range commits {
  168. email := strings.ToLower(commit.Author.Email)
  169. name := strings.ToLower(commit.Author.Name)
  170. id, exists := dict[email]
  171. if exists {
  172. _, exists := dict[name]
  173. if !exists {
  174. dict[name] = id
  175. names[id] = append(names[id], name)
  176. }
  177. continue
  178. }
  179. id, exists = dict[name]
  180. if exists {
  181. dict[email] = id
  182. emails[id] = append(emails[id], email)
  183. continue
  184. }
  185. dict[email] = size
  186. dict[name] = size
  187. emails[size] = append(emails[size], email)
  188. names[size] = append(names[size], name)
  189. size += 1
  190. }
  191. reverse_dict := make([]string, size)
  192. for _, val := range dict {
  193. sort.Strings(names[val])
  194. sort.Strings(emails[val])
  195. reverse_dict[val] = strings.Join(names[val], "|") + "|" + strings.Join(emails[val], "|")
  196. }
  197. id.PeopleDict = dict
  198. id.ReversedPeopleDict = reverse_dict
  199. }
  200. // MergeReversedDicts joins two identity lists together, excluding duplicates, in-order.
  201. func (_ IdentityDetector) MergeReversedDicts(rd1, rd2 []string) (map[string][3]int, []string) {
  202. people := map[string][3]int{}
  203. for i, pid := range rd1 {
  204. ptrs := people[pid]
  205. ptrs[0] = len(people)
  206. ptrs[1] = i
  207. ptrs[2] = -1
  208. people[pid] = ptrs
  209. }
  210. for i, pid := range rd2 {
  211. ptrs, exists := people[pid]
  212. if !exists {
  213. ptrs[0] = len(people)
  214. ptrs[1] = -1
  215. }
  216. ptrs[2] = i
  217. people[pid] = ptrs
  218. }
  219. mrd := make([]string, len(people))
  220. for name, ptrs := range people {
  221. mrd[ptrs[0]] = name
  222. }
  223. return people, mrd
  224. }
  225. func init() {
  226. Registry.Register(&IdentityDetector{})
  227. }