pipeline.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519
  1. package hercules
  2. import (
  3. "bufio"
  4. "errors"
  5. "flag"
  6. "fmt"
  7. "io"
  8. "io/ioutil"
  9. "os"
  10. "reflect"
  11. "strings"
  12. "unsafe"
  13. "gopkg.in/src-d/go-git.v4"
  14. "gopkg.in/src-d/go-git.v4/plumbing"
  15. "gopkg.in/src-d/go-git.v4/plumbing/object"
  16. "gopkg.in/src-d/hercules.v2/toposort"
  17. )
  18. type ConfigurationOptionType int
  19. const (
  20. // Boolean value type.
  21. BoolConfigurationOption ConfigurationOptionType = iota
  22. // Integer value type.
  23. IntConfigurationOption
  24. // String value type.
  25. StringConfigurationOption
  26. )
  27. // ConfigurationOption allows for the unified, retrospective way to setup PipelineItem-s.
  28. type ConfigurationOption struct {
  29. // Name identifies the configuration option in facts.
  30. Name string
  31. // Description represents the help text about the configuration option.
  32. Description string
  33. // Flag corresponds to the CLI token with "-" prepended.
  34. Flag string
  35. // Type specifies the kind of the configuration option's value.
  36. Type ConfigurationOptionType
  37. // Default is the initial value of the configuration option.
  38. Default interface{}
  39. }
  40. // PipelineItem is the interface for all the units of the Git commit analysis pipeline.
  41. type PipelineItem interface {
  42. // Name returns the name of the analysis.
  43. Name() string
  44. // Provides returns the list of keys of reusable calculated entities.
  45. // Other items may depend on them.
  46. Provides() []string
  47. // Requires returns the list of keys of needed entities which must be supplied in Consume().
  48. Requires() []string
  49. // ListConfigurationOptions returns the list of available options which can be consumed by Configure().
  50. ListConfigurationOptions() []ConfigurationOption
  51. // Configure performs the initial setup of the object by applying parameters from facts.
  52. // It allows to create PipelineItems in a universal way.
  53. Configure(facts map[string]interface{})
  54. // Initialize prepares and resets the item. Consume() requires Initialize()
  55. // to be called at least once beforehand.
  56. Initialize(*git.Repository)
  57. // Consume processes the next commit.
  58. // deps contains the required entities which match Depends(). Besides, it always includes
  59. // "commit" and "index".
  60. // Returns the calculated entities which match Provides().
  61. Consume(deps map[string]interface{}) (map[string]interface{}, error)
  62. }
  63. // FeaturedPipelineItem enables switching the automatic insertion of pipeline items on or off.
  64. type FeaturedPipelineItem interface {
  65. PipelineItem
  66. // Features returns the list of names which enable this item to be automatically inserted
  67. // in Pipeline.DeployItem().
  68. Features() []string
  69. }
  70. // FinalizedPipelineItem corresponds to the top level pipeline items which produce the end results.
  71. type FinalizedPipelineItem interface {
  72. PipelineItem
  73. // Finalize returns the result of the analysis.
  74. Finalize() interface{}
  75. // Flag returns the cmdline name of the item.
  76. Flag() string
  77. }
  78. type PipelineItemRegistry struct {
  79. provided map[string][]reflect.Type
  80. registered map[string]reflect.Type
  81. flags map[string]reflect.Type
  82. }
  83. func (registry *PipelineItemRegistry) Register(example PipelineItem) {
  84. t := reflect.TypeOf(example)
  85. registry.registered[example.Name()] = t
  86. if fpi, ok := interface{}(example).(FinalizedPipelineItem); ok {
  87. registry.flags[fpi.Flag()] = t
  88. }
  89. for _, dep := range example.Provides() {
  90. ts := registry.provided[dep]
  91. if ts == nil {
  92. ts = []reflect.Type{}
  93. }
  94. ts = append(ts, t)
  95. registry.provided[dep] = ts
  96. }
  97. }
  98. func (registry *PipelineItemRegistry) Summon(providesOrName string) []PipelineItem {
  99. if registry.provided == nil {
  100. return []PipelineItem{}
  101. }
  102. ts := registry.provided[providesOrName]
  103. items := []PipelineItem{}
  104. for _, t := range ts {
  105. items = append(items, reflect.New(t.Elem()).Interface().(PipelineItem))
  106. }
  107. if t, exists := registry.registered[providesOrName]; exists {
  108. items = append(items, reflect.New(t.Elem()).Interface().(PipelineItem))
  109. }
  110. return items
  111. }
  112. type arrayFeatureFlags struct {
  113. // Flags containts the features activated through the command line.
  114. Flags []string
  115. // Choices contains all registered features.
  116. Choices map[string]bool
  117. }
  118. func (acf *arrayFeatureFlags) String() string {
  119. return strings.Join([]string(acf.Flags), ", ")
  120. }
  121. func (acf *arrayFeatureFlags) Set(value string) error {
  122. if _, exists := acf.Choices[value]; !exists {
  123. return errors.New(fmt.Sprintf("Feature \"%s\" is not registered.", value))
  124. }
  125. acf.Flags = append(acf.Flags, value)
  126. return nil
  127. }
  128. var featureFlags = arrayFeatureFlags{Flags: []string{}, Choices: map[string]bool{}}
  129. func (registry *PipelineItemRegistry) AddFlags() (map[string]interface{}, map[string]*bool) {
  130. flags := map[string]interface{}{}
  131. deployed := map[string]*bool{}
  132. for name, it := range registry.registered {
  133. formatHelp := func(desc string) string {
  134. return fmt.Sprintf("%s [%s]", desc, name)
  135. }
  136. itemIface := reflect.New(it.Elem()).Interface()
  137. for _, opt := range itemIface.(PipelineItem).ListConfigurationOptions() {
  138. var iface interface{}
  139. switch opt.Type {
  140. case BoolConfigurationOption:
  141. iface = interface{}(true)
  142. ptr := (**bool)(unsafe.Pointer(uintptr(unsafe.Pointer(&iface)) + unsafe.Sizeof(&iface)))
  143. *ptr = flag.Bool(opt.Flag, opt.Default.(bool), formatHelp(opt.Description))
  144. case IntConfigurationOption:
  145. iface = interface{}(0)
  146. ptr := (**int)(unsafe.Pointer(uintptr(unsafe.Pointer(&iface)) + unsafe.Sizeof(&iface)))
  147. *ptr = flag.Int(opt.Flag, opt.Default.(int), formatHelp(opt.Description))
  148. case StringConfigurationOption:
  149. iface = interface{}("")
  150. ptr := (**string)(unsafe.Pointer(uintptr(unsafe.Pointer(&iface)) + unsafe.Sizeof(&iface)))
  151. *ptr = flag.String(opt.Flag, opt.Default.(string), formatHelp(opt.Description))
  152. }
  153. flags[opt.Name] = iface
  154. }
  155. if fpi, ok := itemIface.(FeaturedPipelineItem); ok {
  156. for _, f := range fpi.Features() {
  157. featureFlags.Choices[f] = true
  158. }
  159. }
  160. if fpi, ok := itemIface.(FinalizedPipelineItem); ok {
  161. deployed[fpi.Name()] = flag.Bool(
  162. fpi.Flag(), false, fmt.Sprintf("Runs %s analysis.", fpi.Name()))
  163. }
  164. }
  165. features := []string{}
  166. for f := range featureFlags.Choices {
  167. features = append(features, f)
  168. }
  169. flag.Var(&featureFlags, "feature",
  170. fmt.Sprintf("Enables specific analysis features, can be specified "+
  171. "multiple times. Available features: [%s].", strings.Join(features, ", ")))
  172. return flags, deployed
  173. }
  174. // Registry contains all known pipeline item types.
  175. var Registry = &PipelineItemRegistry{
  176. provided: map[string][]reflect.Type{},
  177. registered: map[string]reflect.Type{},
  178. flags: map[string]reflect.Type{},
  179. }
  180. type wrappedPipelineItem struct {
  181. Item PipelineItem
  182. Children []wrappedPipelineItem
  183. }
  184. type Pipeline struct {
  185. // OnProgress is the callback which is invoked in Analyse() to output it's
  186. // progress. The first argument is the number of processed commits and the
  187. // second is the total number of commits.
  188. OnProgress func(int, int)
  189. // repository points to the analysed Git repository struct from go-git.
  190. repository *git.Repository
  191. // items are the registered building blocks in the pipeline. The order defines the
  192. // execution sequence.
  193. items []PipelineItem
  194. // the collection of parameters to create items.
  195. facts map[string]interface{}
  196. // Feature flags which enable the corresponding items.
  197. features map[string]bool
  198. }
  199. func NewPipeline(repository *git.Repository) *Pipeline {
  200. return &Pipeline{
  201. repository: repository,
  202. items: []PipelineItem{},
  203. facts: map[string]interface{}{},
  204. features: map[string]bool{},
  205. }
  206. }
  207. func (pipeline *Pipeline) GetFact(name string) interface{} {
  208. return pipeline.facts[name]
  209. }
  210. func (pipeline *Pipeline) SetFact(name string, value interface{}) {
  211. pipeline.facts[name] = value
  212. }
  213. func (pipeline *Pipeline) GetFeature(name string) bool {
  214. return pipeline.features[name]
  215. }
  216. func (pipeline *Pipeline) SetFeature(name string) {
  217. pipeline.features[name] = true
  218. }
  219. func (pipeline *Pipeline) SetFeaturesFromFlags() {
  220. for _, feature := range featureFlags.Flags {
  221. pipeline.SetFeature(feature)
  222. }
  223. }
  224. func (pipeline *Pipeline) DeployItem(item PipelineItem) PipelineItem {
  225. queue := []PipelineItem{}
  226. queue = append(queue, item)
  227. added := map[string]PipelineItem{}
  228. added[item.Name()] = item
  229. pipeline.AddItem(item)
  230. for len(queue) > 0 {
  231. head := queue[0]
  232. queue = queue[1:]
  233. for _, dep := range head.Requires() {
  234. for _, sibling := range Registry.Summon(dep) {
  235. if _, exists := added[sibling.Name()]; !exists {
  236. disabled := false
  237. // If this item supports features, check them against the activated in pipeline.features
  238. if fpi, matches := interface{}(sibling).(FeaturedPipelineItem); matches {
  239. for _, feature := range fpi.Features() {
  240. if !pipeline.features[feature] {
  241. disabled = true
  242. break
  243. }
  244. }
  245. }
  246. if disabled {
  247. continue
  248. }
  249. added[sibling.Name()] = sibling
  250. queue = append(queue, sibling)
  251. pipeline.AddItem(sibling)
  252. }
  253. }
  254. }
  255. }
  256. return item
  257. }
  258. func (pipeline *Pipeline) AddItem(item PipelineItem) PipelineItem {
  259. pipeline.items = append(pipeline.items, item)
  260. return item
  261. }
  262. func (pipeline *Pipeline) RemoveItem(item PipelineItem) {
  263. for i, reg := range pipeline.items {
  264. if reg == item {
  265. pipeline.items = append(pipeline.items[:i], pipeline.items[i+1:]...)
  266. return
  267. }
  268. }
  269. }
  270. func (pipeline *Pipeline) Len() int {
  271. return len(pipeline.items)
  272. }
  273. // Commits returns the critical path in the repository's history. It starts
  274. // from HEAD and traces commits backwards till the root. When it encounters
  275. // a merge (more than one parent), it always chooses the first parent.
  276. func (pipeline *Pipeline) Commits() []*object.Commit {
  277. result := []*object.Commit{}
  278. repository := pipeline.repository
  279. head, err := repository.Head()
  280. if err != nil {
  281. panic(err)
  282. }
  283. commit, err := repository.CommitObject(head.Hash())
  284. if err != nil {
  285. panic(err)
  286. }
  287. // the first parent matches the head
  288. for ; err != io.EOF; commit, err = commit.Parents().Next() {
  289. if err != nil {
  290. panic(err)
  291. }
  292. result = append(result, commit)
  293. }
  294. // reverse the order
  295. for i, j := 0, len(result)-1; i < j; i, j = i+1, j-1 {
  296. result[i], result[j] = result[j], result[i]
  297. }
  298. return result
  299. }
  300. func (pipeline *Pipeline) resolve(dumpPath string) {
  301. graph := toposort.NewGraph()
  302. name2item := map[string]PipelineItem{}
  303. ambiguousMap := map[string][]string{}
  304. nameUsages := map[string]int{}
  305. for _, item := range pipeline.items {
  306. nameUsages[item.Name()]++
  307. }
  308. counters := map[string]int{}
  309. for _, item := range pipeline.items {
  310. name := item.Name()
  311. if nameUsages[name] > 1 {
  312. index := counters[item.Name()] + 1
  313. counters[item.Name()] = index
  314. name = fmt.Sprintf("%s_%d", item.Name(), index)
  315. }
  316. graph.AddNode(name)
  317. name2item[name] = item
  318. for _, key := range item.Provides() {
  319. key = "[" + key + "]"
  320. graph.AddNode(key)
  321. if graph.AddEdge(name, key) > 1 {
  322. if ambiguousMap[key] != nil {
  323. panic("Failed to resolve pipeline dependencies.")
  324. }
  325. ambiguousMap[key] = graph.FindParents(key)
  326. }
  327. }
  328. }
  329. counters = map[string]int{}
  330. for _, item := range pipeline.items {
  331. name := item.Name()
  332. if nameUsages[name] > 1 {
  333. index := counters[item.Name()] + 1
  334. counters[item.Name()] = index
  335. name = fmt.Sprintf("%s_%d", item.Name(), index)
  336. }
  337. for _, key := range item.Requires() {
  338. key = "[" + key + "]"
  339. if graph.AddEdge(key, name) == 0 {
  340. panic(fmt.Sprintf("Unsatisfied dependency: %s -> %s", key, item.Name()))
  341. }
  342. }
  343. }
  344. if len(ambiguousMap) > 0 {
  345. ambiguous := []string{}
  346. for key := range ambiguousMap {
  347. ambiguous = append(ambiguous, key)
  348. }
  349. bfsorder := graph.BreadthSort()
  350. bfsindex := map[string]int{}
  351. for i, s := range bfsorder {
  352. bfsindex[s] = i
  353. }
  354. for len(ambiguous) > 0 {
  355. key := ambiguous[0]
  356. ambiguous = ambiguous[1:]
  357. pair := ambiguousMap[key]
  358. inheritor := pair[1]
  359. if bfsindex[pair[1]] < bfsindex[pair[0]] {
  360. inheritor = pair[0]
  361. }
  362. removed := graph.RemoveEdge(key, inheritor)
  363. cycle := map[string]bool{}
  364. for _, node := range graph.FindCycle(key) {
  365. cycle[node] = true
  366. }
  367. if len(cycle) == 0 {
  368. cycle[inheritor] = true
  369. }
  370. if removed {
  371. graph.AddEdge(key, inheritor)
  372. }
  373. graph.RemoveEdge(inheritor, key)
  374. graph.ReindexNode(inheritor)
  375. // for all nodes key links to except those in cycle, put the link from inheritor
  376. for _, node := range graph.FindChildren(key) {
  377. if _, exists := cycle[node]; !exists {
  378. graph.AddEdge(inheritor, node)
  379. graph.RemoveEdge(key, node)
  380. }
  381. }
  382. graph.ReindexNode(key)
  383. }
  384. }
  385. var graphCopy *toposort.Graph
  386. if dumpPath != "" {
  387. graphCopy = graph.Copy()
  388. }
  389. strplan, ok := graph.Toposort()
  390. if !ok {
  391. panic("Failed to resolve pipeline dependencies.")
  392. }
  393. pipeline.items = make([]PipelineItem, 0, len(pipeline.items))
  394. for _, key := range strplan {
  395. if item, ok := name2item[key]; ok {
  396. pipeline.items = append(pipeline.items, item)
  397. }
  398. }
  399. if dumpPath != "" {
  400. ioutil.WriteFile(dumpPath, []byte(graphCopy.Serialize(strplan)), 0666)
  401. }
  402. }
  403. func (pipeline *Pipeline) Initialize(facts map[string]interface{}) {
  404. dumpPath, _ := facts["Pipeline.DumpPath"].(string)
  405. pipeline.resolve(dumpPath)
  406. if dryRun, _ := facts["Pipeline.DryRun"].(bool); dryRun {
  407. return
  408. }
  409. for _, item := range pipeline.items {
  410. item.Configure(facts)
  411. }
  412. for _, item := range pipeline.items {
  413. item.Initialize(pipeline.repository)
  414. }
  415. }
  416. // Run executes the pipeline.
  417. //
  418. // commits is a slice with the sequential commit history. It shall start from
  419. // the root (ascending order).
  420. func (pipeline *Pipeline) Run(commits []*object.Commit) (map[PipelineItem]interface{}, error) {
  421. onProgress := pipeline.OnProgress
  422. if onProgress == nil {
  423. onProgress = func(int, int) {}
  424. }
  425. for index, commit := range commits {
  426. onProgress(index, len(commits))
  427. state := map[string]interface{}{"commit": commit, "index": index}
  428. for _, item := range pipeline.items {
  429. update, err := item.Consume(state)
  430. if err != nil {
  431. fmt.Fprintf(os.Stderr, "%s failed on commit #%d %s\n",
  432. item.Name(), index, commit.Hash.String())
  433. return nil, err
  434. }
  435. for _, key := range item.Provides() {
  436. val, ok := update[key]
  437. if !ok {
  438. panic(fmt.Sprintf("%s: Consume() did not return %s", item.Name(), key))
  439. }
  440. state[key] = val
  441. }
  442. }
  443. }
  444. onProgress(len(commits), len(commits))
  445. result := map[PipelineItem]interface{}{}
  446. for _, item := range pipeline.items {
  447. if fpi, ok := interface{}(item).(FinalizedPipelineItem); ok {
  448. result[item] = fpi.Finalize()
  449. }
  450. }
  451. return result, nil
  452. }
  453. func LoadCommitsFromFile(path string, repository *git.Repository) ([]*object.Commit, error) {
  454. var file io.ReadCloser
  455. if path != "-" {
  456. var err error
  457. file, err = os.Open(path)
  458. if err != nil {
  459. return nil, err
  460. }
  461. defer file.Close()
  462. } else {
  463. file = os.Stdin
  464. }
  465. scanner := bufio.NewScanner(file)
  466. commits := []*object.Commit{}
  467. for scanner.Scan() {
  468. hash := plumbing.NewHash(scanner.Text())
  469. if len(hash) != 20 {
  470. return nil, errors.New("invalid commit hash " + scanner.Text())
  471. }
  472. commit, err := repository.CommitObject(hash)
  473. if err != nil {
  474. return nil, err
  475. }
  476. commits = append(commits, commit)
  477. }
  478. return commits, nil
  479. }