pipeline.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571
  1. package hercules
  2. import (
  3. "bufio"
  4. "errors"
  5. "flag"
  6. "fmt"
  7. "io"
  8. "io/ioutil"
  9. "os"
  10. "reflect"
  11. "strings"
  12. "unsafe"
  13. "gopkg.in/src-d/go-git.v4"
  14. "gopkg.in/src-d/go-git.v4/plumbing"
  15. "gopkg.in/src-d/go-git.v4/plumbing/object"
  16. "gopkg.in/src-d/hercules.v3/toposort"
  17. "sort"
  18. )
  19. type ConfigurationOptionType int
  20. const (
  21. // Boolean value type.
  22. BoolConfigurationOption ConfigurationOptionType = iota
  23. // Integer value type.
  24. IntConfigurationOption
  25. // String value type.
  26. StringConfigurationOption
  27. )
  28. // ConfigurationOption allows for the unified, retrospective way to setup PipelineItem-s.
  29. type ConfigurationOption struct {
  30. // Name identifies the configuration option in facts.
  31. Name string
  32. // Description represents the help text about the configuration option.
  33. Description string
  34. // Flag corresponds to the CLI token with "-" prepended.
  35. Flag string
  36. // Type specifies the kind of the configuration option's value.
  37. Type ConfigurationOptionType
  38. // Default is the initial value of the configuration option.
  39. Default interface{}
  40. }
  41. // PipelineItem is the interface for all the units of the Git commit analysis pipeline.
  42. type PipelineItem interface {
  43. // Name returns the name of the analysis.
  44. Name() string
  45. // Provides returns the list of keys of reusable calculated entities.
  46. // Other items may depend on them.
  47. Provides() []string
  48. // Requires returns the list of keys of needed entities which must be supplied in Consume().
  49. Requires() []string
  50. // ListConfigurationOptions returns the list of available options which can be consumed by Configure().
  51. ListConfigurationOptions() []ConfigurationOption
  52. // Configure performs the initial setup of the object by applying parameters from facts.
  53. // It allows to create PipelineItems in a universal way.
  54. Configure(facts map[string]interface{})
  55. // Initialize prepares and resets the item. Consume() requires Initialize()
  56. // to be called at least once beforehand.
  57. Initialize(*git.Repository)
  58. // Consume processes the next commit.
  59. // deps contains the required entities which match Depends(). Besides, it always includes
  60. // "commit" and "index".
  61. // Returns the calculated entities which match Provides().
  62. Consume(deps map[string]interface{}) (map[string]interface{}, error)
  63. }
  64. // FeaturedPipelineItem enables switching the automatic insertion of pipeline items on or off.
  65. type FeaturedPipelineItem interface {
  66. PipelineItem
  67. // Features returns the list of names which enable this item to be automatically inserted
  68. // in Pipeline.DeployItem().
  69. Features() []string
  70. }
  71. // LeafPipelineItem corresponds to the top level pipeline items which produce the end results.
  72. type LeafPipelineItem interface {
  73. PipelineItem
  74. // Flag returns the cmdline name of the item.
  75. Flag() string
  76. // Finalize returns the result of the analysis.
  77. Finalize() interface{}
  78. // Serialize encodes the object returned by Finalize() to Text or Protocol Buffers.
  79. Serialize(result interface{}, binary bool, writer io.Writer) error
  80. }
  81. // PipelineItemRegistry contains all the known PipelineItem-s.
  82. type PipelineItemRegistry struct {
  83. provided map[string][]reflect.Type
  84. registered map[string]reflect.Type
  85. flags map[string]reflect.Type
  86. }
  87. // Register adds another PipelineItem to the registry.
  88. func (registry *PipelineItemRegistry) Register(example PipelineItem) {
  89. t := reflect.TypeOf(example)
  90. registry.registered[example.Name()] = t
  91. if fpi, ok := interface{}(example).(LeafPipelineItem); ok {
  92. registry.flags[fpi.Flag()] = t
  93. }
  94. for _, dep := range example.Provides() {
  95. ts := registry.provided[dep]
  96. if ts == nil {
  97. ts = []reflect.Type{}
  98. }
  99. ts = append(ts, t)
  100. registry.provided[dep] = ts
  101. }
  102. }
  103. func (registry *PipelineItemRegistry) Summon(providesOrName string) []PipelineItem {
  104. if registry.provided == nil {
  105. return []PipelineItem{}
  106. }
  107. ts := registry.provided[providesOrName]
  108. items := []PipelineItem{}
  109. for _, t := range ts {
  110. items = append(items, reflect.New(t.Elem()).Interface().(PipelineItem))
  111. }
  112. if t, exists := registry.registered[providesOrName]; exists {
  113. items = append(items, reflect.New(t.Elem()).Interface().(PipelineItem))
  114. }
  115. return items
  116. }
  117. type arrayFeatureFlags struct {
  118. // Flags containts the features activated through the command line.
  119. Flags []string
  120. // Choices contains all registered features.
  121. Choices map[string]bool
  122. }
  123. func (acf *arrayFeatureFlags) String() string {
  124. return strings.Join([]string(acf.Flags), ", ")
  125. }
  126. func (acf *arrayFeatureFlags) Set(value string) error {
  127. if _, exists := acf.Choices[value]; !exists {
  128. return errors.New(fmt.Sprintf("Feature \"%s\" is not registered.", value))
  129. }
  130. acf.Flags = append(acf.Flags, value)
  131. return nil
  132. }
  133. var featureFlags = arrayFeatureFlags{Flags: []string{}, Choices: map[string]bool{}}
  134. // AddFlags inserts the cmdline options from PipelineItem.ListConfigurationOptions(),
  135. // FeaturedPipelineItem().Features() and LeafPipelineItem.Flag() into the global "flag" parser
  136. // built into the Go runtime.
  137. // Returns the "facts" which can be fed into PipelineItem.Configure() and the dictionary of
  138. // runnable analysis (LeafPipelineItem) choices. E.g. if "BurndownAnalysis" was activated
  139. // through "-burndown" cmdline argument, this mapping would contain ["BurndownAnalysis"] = *true.
  140. func (registry *PipelineItemRegistry) AddFlags() (map[string]interface{}, map[string]*bool) {
  141. flags := map[string]interface{}{}
  142. deployed := map[string]*bool{}
  143. for name, it := range registry.registered {
  144. formatHelp := func(desc string) string {
  145. return fmt.Sprintf("%s [%s]", desc, name)
  146. }
  147. itemIface := reflect.New(it.Elem()).Interface()
  148. for _, opt := range itemIface.(PipelineItem).ListConfigurationOptions() {
  149. var iface interface{}
  150. switch opt.Type {
  151. case BoolConfigurationOption:
  152. iface = interface{}(true)
  153. ptr := (**bool)(unsafe.Pointer(uintptr(unsafe.Pointer(&iface)) + unsafe.Sizeof(&iface)))
  154. *ptr = flag.Bool(opt.Flag, opt.Default.(bool), formatHelp(opt.Description))
  155. case IntConfigurationOption:
  156. iface = interface{}(0)
  157. ptr := (**int)(unsafe.Pointer(uintptr(unsafe.Pointer(&iface)) + unsafe.Sizeof(&iface)))
  158. *ptr = flag.Int(opt.Flag, opt.Default.(int), formatHelp(opt.Description))
  159. case StringConfigurationOption:
  160. iface = interface{}("")
  161. ptr := (**string)(unsafe.Pointer(uintptr(unsafe.Pointer(&iface)) + unsafe.Sizeof(&iface)))
  162. *ptr = flag.String(opt.Flag, opt.Default.(string), formatHelp(opt.Description))
  163. }
  164. flags[opt.Name] = iface
  165. }
  166. if fpi, ok := itemIface.(FeaturedPipelineItem); ok {
  167. for _, f := range fpi.Features() {
  168. featureFlags.Choices[f] = true
  169. }
  170. }
  171. if fpi, ok := itemIface.(LeafPipelineItem); ok {
  172. deployed[fpi.Name()] = flag.Bool(
  173. fpi.Flag(), false, fmt.Sprintf("Runs %s analysis.", fpi.Name()))
  174. }
  175. }
  176. features := []string{}
  177. for f := range featureFlags.Choices {
  178. features = append(features, f)
  179. }
  180. flag.Var(&featureFlags, "feature",
  181. fmt.Sprintf("Enables specific analysis features, can be specified "+
  182. "multiple times. Available features: [%s].", strings.Join(features, ", ")))
  183. return flags, deployed
  184. }
  185. // Registry contains all known pipeline item types.
  186. var Registry = &PipelineItemRegistry{
  187. provided: map[string][]reflect.Type{},
  188. registered: map[string]reflect.Type{},
  189. flags: map[string]reflect.Type{},
  190. }
  191. type wrappedPipelineItem struct {
  192. Item PipelineItem
  193. Children []wrappedPipelineItem
  194. }
  195. type Pipeline struct {
  196. // OnProgress is the callback which is invoked in Analyse() to output it's
  197. // progress. The first argument is the number of processed commits and the
  198. // second is the total number of commits.
  199. OnProgress func(int, int)
  200. // Repository points to the analysed Git repository struct from go-git.
  201. repository *git.Repository
  202. // Items are the registered building blocks in the pipeline. The order defines the
  203. // execution sequence.
  204. items []PipelineItem
  205. // The collection of parameters to create items.
  206. facts map[string]interface{}
  207. // Feature flags which enable the corresponding items.
  208. features map[string]bool
  209. }
  210. const FactPipelineCommits = "commits"
  211. func NewPipeline(repository *git.Repository) *Pipeline {
  212. return &Pipeline{
  213. repository: repository,
  214. items: []PipelineItem{},
  215. facts: map[string]interface{}{},
  216. features: map[string]bool{},
  217. }
  218. }
  219. func (pipeline *Pipeline) GetFact(name string) interface{} {
  220. return pipeline.facts[name]
  221. }
  222. func (pipeline *Pipeline) SetFact(name string, value interface{}) {
  223. pipeline.facts[name] = value
  224. }
  225. func (pipeline *Pipeline) GetFeature(name string) (bool, bool) {
  226. val, exists := pipeline.features[name]
  227. return val, exists
  228. }
  229. func (pipeline *Pipeline) SetFeature(name string) {
  230. pipeline.features[name] = true
  231. }
  232. func (pipeline *Pipeline) SetFeaturesFromFlags() {
  233. for _, feature := range featureFlags.Flags {
  234. pipeline.SetFeature(feature)
  235. }
  236. }
  237. func (pipeline *Pipeline) DeployItem(item PipelineItem) PipelineItem {
  238. queue := []PipelineItem{}
  239. queue = append(queue, item)
  240. added := map[string]PipelineItem{}
  241. for _, item := range pipeline.items {
  242. added[item.Name()] = item
  243. }
  244. added[item.Name()] = item
  245. pipeline.AddItem(item)
  246. for len(queue) > 0 {
  247. head := queue[0]
  248. queue = queue[1:]
  249. for _, dep := range head.Requires() {
  250. for _, sibling := range Registry.Summon(dep) {
  251. if _, exists := added[sibling.Name()]; !exists {
  252. disabled := false
  253. // If this item supports features, check them against the activated in pipeline.features
  254. if fpi, matches := interface{}(sibling).(FeaturedPipelineItem); matches {
  255. for _, feature := range fpi.Features() {
  256. if !pipeline.features[feature] {
  257. disabled = true
  258. break
  259. }
  260. }
  261. }
  262. if disabled {
  263. continue
  264. }
  265. added[sibling.Name()] = sibling
  266. queue = append(queue, sibling)
  267. pipeline.AddItem(sibling)
  268. }
  269. }
  270. }
  271. }
  272. return item
  273. }
  274. func (pipeline *Pipeline) AddItem(item PipelineItem) PipelineItem {
  275. pipeline.items = append(pipeline.items, item)
  276. return item
  277. }
  278. func (pipeline *Pipeline) RemoveItem(item PipelineItem) {
  279. for i, reg := range pipeline.items {
  280. if reg == item {
  281. pipeline.items = append(pipeline.items[:i], pipeline.items[i+1:]...)
  282. return
  283. }
  284. }
  285. }
  286. func (pipeline *Pipeline) Len() int {
  287. return len(pipeline.items)
  288. }
  289. // Commits returns the critical path in the repository's history. It starts
  290. // from HEAD and traces commits backwards till the root. When it encounters
  291. // a merge (more than one parent), it always chooses the first parent.
  292. func (pipeline *Pipeline) Commits() []*object.Commit {
  293. result := []*object.Commit{}
  294. repository := pipeline.repository
  295. head, err := repository.Head()
  296. if err != nil {
  297. panic(err)
  298. }
  299. commit, err := repository.CommitObject(head.Hash())
  300. if err != nil {
  301. panic(err)
  302. }
  303. // the first parent matches the head
  304. for ; err != io.EOF; commit, err = commit.Parents().Next() {
  305. if err != nil {
  306. panic(err)
  307. }
  308. result = append(result, commit)
  309. }
  310. // reverse the order
  311. for i, j := 0, len(result)-1; i < j; i, j = i+1, j-1 {
  312. result[i], result[j] = result[j], result[i]
  313. }
  314. return result
  315. }
  316. type sortablePipelineItems []PipelineItem
  317. func (items sortablePipelineItems) Len() int {
  318. return len(items)
  319. }
  320. func (items sortablePipelineItems) Less(i, j int) bool {
  321. return items[i].Name() < items[j].Name()
  322. }
  323. func (items sortablePipelineItems) Swap(i, j int) {
  324. items[i], items[j] = items[j], items[i]
  325. }
  326. func (pipeline *Pipeline) resolve(dumpPath string) {
  327. graph := toposort.NewGraph()
  328. sort.Sort(sortablePipelineItems(pipeline.items))
  329. name2item := map[string]PipelineItem{}
  330. ambiguousMap := map[string][]string{}
  331. nameUsages := map[string]int{}
  332. for _, item := range pipeline.items {
  333. nameUsages[item.Name()]++
  334. }
  335. counters := map[string]int{}
  336. for _, item := range pipeline.items {
  337. name := item.Name()
  338. if nameUsages[name] > 1 {
  339. index := counters[item.Name()] + 1
  340. counters[item.Name()] = index
  341. name = fmt.Sprintf("%s_%d", item.Name(), index)
  342. }
  343. graph.AddNode(name)
  344. name2item[name] = item
  345. for _, key := range item.Provides() {
  346. key = "[" + key + "]"
  347. graph.AddNode(key)
  348. if graph.AddEdge(name, key) > 1 {
  349. if ambiguousMap[key] != nil {
  350. fmt.Fprintln(os.Stderr, "Pipeline:")
  351. for _, item2 := range pipeline.items {
  352. if item2 == item {
  353. fmt.Fprint(os.Stderr, "> ")
  354. }
  355. fmt.Fprint(os.Stderr, item2.Name(), " [")
  356. for i, key2 := range item2.Provides() {
  357. fmt.Fprint(os.Stderr, key2)
  358. if i < len(item.Provides()) - 1 {
  359. fmt.Fprint(os.Stderr, ", ")
  360. }
  361. }
  362. fmt.Fprintln(os.Stderr, "]")
  363. }
  364. panic("Failed to resolve pipeline dependencies: ambiguous graph.")
  365. }
  366. ambiguousMap[key] = graph.FindParents(key)
  367. }
  368. }
  369. }
  370. counters = map[string]int{}
  371. for _, item := range pipeline.items {
  372. name := item.Name()
  373. if nameUsages[name] > 1 {
  374. index := counters[item.Name()] + 1
  375. counters[item.Name()] = index
  376. name = fmt.Sprintf("%s_%d", item.Name(), index)
  377. }
  378. for _, key := range item.Requires() {
  379. key = "[" + key + "]"
  380. if graph.AddEdge(key, name) == 0 {
  381. panic(fmt.Sprintf("Unsatisfied dependency: %s -> %s", key, item.Name()))
  382. }
  383. }
  384. }
  385. if len(ambiguousMap) > 0 {
  386. ambiguous := []string{}
  387. for key := range ambiguousMap {
  388. ambiguous = append(ambiguous, key)
  389. }
  390. bfsorder := graph.BreadthSort()
  391. bfsindex := map[string]int{}
  392. for i, s := range bfsorder {
  393. bfsindex[s] = i
  394. }
  395. for len(ambiguous) > 0 {
  396. key := ambiguous[0]
  397. ambiguous = ambiguous[1:]
  398. pair := ambiguousMap[key]
  399. inheritor := pair[1]
  400. if bfsindex[pair[1]] < bfsindex[pair[0]] {
  401. inheritor = pair[0]
  402. }
  403. removed := graph.RemoveEdge(key, inheritor)
  404. cycle := map[string]bool{}
  405. for _, node := range graph.FindCycle(key) {
  406. cycle[node] = true
  407. }
  408. if len(cycle) == 0 {
  409. cycle[inheritor] = true
  410. }
  411. if removed {
  412. graph.AddEdge(key, inheritor)
  413. }
  414. graph.RemoveEdge(inheritor, key)
  415. graph.ReindexNode(inheritor)
  416. // for all nodes key links to except those in cycle, put the link from inheritor
  417. for _, node := range graph.FindChildren(key) {
  418. if _, exists := cycle[node]; !exists {
  419. graph.AddEdge(inheritor, node)
  420. graph.RemoveEdge(key, node)
  421. }
  422. }
  423. graph.ReindexNode(key)
  424. }
  425. }
  426. var graphCopy *toposort.Graph
  427. if dumpPath != "" {
  428. graphCopy = graph.Copy()
  429. }
  430. strplan, ok := graph.Toposort()
  431. if !ok {
  432. panic("Failed to resolve pipeline dependencies: unable to topologically sort the items.")
  433. }
  434. pipeline.items = make([]PipelineItem, 0, len(pipeline.items))
  435. for _, key := range strplan {
  436. if item, ok := name2item[key]; ok {
  437. pipeline.items = append(pipeline.items, item)
  438. }
  439. }
  440. if dumpPath != "" {
  441. ioutil.WriteFile(dumpPath, []byte(graphCopy.Serialize(strplan)), 0666)
  442. }
  443. }
  444. func (pipeline *Pipeline) Initialize(facts map[string]interface{}) {
  445. if facts == nil {
  446. facts = map[string]interface{}{}
  447. }
  448. if _, exists := facts[FactPipelineCommits]; !exists {
  449. facts[FactPipelineCommits] = pipeline.Commits()
  450. }
  451. dumpPath, _ := facts["Pipeline.DumpPath"].(string)
  452. pipeline.resolve(dumpPath)
  453. if dryRun, _ := facts["Pipeline.DryRun"].(bool); dryRun {
  454. return
  455. }
  456. for _, item := range pipeline.items {
  457. item.Configure(facts)
  458. }
  459. for _, item := range pipeline.items {
  460. item.Initialize(pipeline.repository)
  461. }
  462. }
  463. // Run executes the pipeline.
  464. //
  465. // commits is a slice with the sequential commit history. It shall start from
  466. // the root (ascending order).
  467. func (pipeline *Pipeline) Run(commits []*object.Commit) (map[PipelineItem]interface{}, error) {
  468. onProgress := pipeline.OnProgress
  469. if onProgress == nil {
  470. onProgress = func(int, int) {}
  471. }
  472. for index, commit := range commits {
  473. onProgress(index, len(commits))
  474. state := map[string]interface{}{"commit": commit, "index": index}
  475. for _, item := range pipeline.items {
  476. update, err := item.Consume(state)
  477. if err != nil {
  478. fmt.Fprintf(os.Stderr, "%s failed on commit #%d %s\n",
  479. item.Name(), index, commit.Hash.String())
  480. return nil, err
  481. }
  482. for _, key := range item.Provides() {
  483. val, ok := update[key]
  484. if !ok {
  485. panic(fmt.Sprintf("%s: Consume() did not return %s", item.Name(), key))
  486. }
  487. state[key] = val
  488. }
  489. }
  490. }
  491. onProgress(len(commits), len(commits))
  492. result := map[PipelineItem]interface{}{}
  493. for _, item := range pipeline.items {
  494. if fpi, ok := interface{}(item).(LeafPipelineItem); ok {
  495. result[item] = fpi.Finalize()
  496. }
  497. }
  498. return result, nil
  499. }
  500. func LoadCommitsFromFile(path string, repository *git.Repository) ([]*object.Commit, error) {
  501. var file io.ReadCloser
  502. if path != "-" {
  503. var err error
  504. file, err = os.Open(path)
  505. if err != nil {
  506. return nil, err
  507. }
  508. defer file.Close()
  509. } else {
  510. file = os.Stdin
  511. }
  512. scanner := bufio.NewScanner(file)
  513. commits := []*object.Commit{}
  514. for scanner.Scan() {
  515. hash := plumbing.NewHash(scanner.Text())
  516. if len(hash) != 20 {
  517. return nil, errors.New("invalid commit hash " + scanner.Text())
  518. }
  519. commit, err := repository.CommitObject(hash)
  520. if err != nil {
  521. return nil, err
  522. }
  523. commits = append(commits, commit)
  524. }
  525. return commits, nil
  526. }