pipeline.go 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769
  1. package core
  2. import (
  3. "bufio"
  4. "fmt"
  5. "io"
  6. "io/ioutil"
  7. "log"
  8. "os"
  9. "path/filepath"
  10. "sort"
  11. "strings"
  12. "time"
  13. "github.com/pkg/errors"
  14. "gopkg.in/src-d/go-git.v4"
  15. "gopkg.in/src-d/go-git.v4/plumbing"
  16. "gopkg.in/src-d/go-git.v4/plumbing/object"
  17. "gopkg.in/src-d/go-git.v4/plumbing/storer"
  18. "gopkg.in/src-d/hercules.v6/internal/pb"
  19. "gopkg.in/src-d/hercules.v6/internal/toposort"
  20. )
  21. // ConfigurationOptionType represents the possible types of a ConfigurationOption's value.
  22. type ConfigurationOptionType int
  23. const (
  24. // BoolConfigurationOption reflects the boolean value type.
  25. BoolConfigurationOption ConfigurationOptionType = iota
  26. // IntConfigurationOption reflects the integer value type.
  27. IntConfigurationOption
  28. // StringConfigurationOption reflects the string value type.
  29. StringConfigurationOption
  30. // FloatConfigurationOption reflects a floating point value type.
  31. FloatConfigurationOption
  32. // StringsConfigurationOption reflects the array of strings value type.
  33. StringsConfigurationOption
  34. )
  35. // String() returns an empty string for the boolean type, "int" for integers and "string" for
  36. // strings. It is used in the command line interface to show the argument's type.
  37. func (opt ConfigurationOptionType) String() string {
  38. switch opt {
  39. case BoolConfigurationOption:
  40. return ""
  41. case IntConfigurationOption:
  42. return "int"
  43. case StringConfigurationOption:
  44. return "string"
  45. case FloatConfigurationOption:
  46. return "float"
  47. case StringsConfigurationOption:
  48. return "string"
  49. }
  50. log.Panicf("Invalid ConfigurationOptionType value %d", opt)
  51. return ""
  52. }
  53. // ConfigurationOption allows for the unified, retrospective way to setup PipelineItem-s.
  54. type ConfigurationOption struct {
  55. // Name identifies the configuration option in facts.
  56. Name string
  57. // Description represents the help text about the configuration option.
  58. Description string
  59. // Flag corresponds to the CLI token with "--" prepended.
  60. Flag string
  61. // Type specifies the kind of the configuration option's value.
  62. Type ConfigurationOptionType
  63. // Default is the initial value of the configuration option.
  64. Default interface{}
  65. }
  66. // FormatDefault converts the default value of ConfigurationOption to string.
  67. // Used in the command line interface to show the argument's default value.
  68. func (opt ConfigurationOption) FormatDefault() string {
  69. if opt.Type == StringsConfigurationOption {
  70. return fmt.Sprintf("\"%s\"", strings.Join(opt.Default.([]string), ","))
  71. }
  72. if opt.Type != StringConfigurationOption {
  73. return fmt.Sprint(opt.Default)
  74. }
  75. return fmt.Sprintf("\"%s\"", opt.Default)
  76. }
  77. // PipelineItem is the interface for all the units in the Git commits analysis pipeline.
  78. type PipelineItem interface {
  79. // Name returns the name of the analysis.
  80. Name() string
  81. // Provides returns the list of keys of reusable calculated entities.
  82. // Other items may depend on them.
  83. Provides() []string
  84. // Requires returns the list of keys of needed entities which must be supplied in Consume().
  85. Requires() []string
  86. // ListConfigurationOptions returns the list of available options which can be consumed by Configure().
  87. ListConfigurationOptions() []ConfigurationOption
  88. // Configure performs the initial setup of the object by applying parameters from facts.
  89. // It allows to create PipelineItems in a universal way.
  90. Configure(facts map[string]interface{}) error
  91. // Initialize prepares and resets the item. Consume() requires Initialize()
  92. // to be called at least once beforehand.
  93. Initialize(*git.Repository) error
  94. // Consume processes the next commit.
  95. // deps contains the required entities which match Depends(). Besides, it always includes
  96. // DependencyCommit and DependencyIndex.
  97. // Returns the calculated entities which match Provides().
  98. Consume(deps map[string]interface{}) (map[string]interface{}, error)
  99. // Fork clones the item the requested number of times. The data links between the clones
  100. // are up to the implementation. Needed to handle Git branches. See also Merge().
  101. // Returns a slice with `n` fresh clones. In other words, it does not include the original item.
  102. Fork(n int) []PipelineItem
  103. // Merge combines several branches together. Each is supposed to have been created with Fork().
  104. // The result is stored in the called item, thus this function returns nothing.
  105. // Merge() must update all the branches, not only self. When several branches merge, some of
  106. // them may continue to live, hence this requirement.
  107. Merge(branches []PipelineItem)
  108. }
  109. // FeaturedPipelineItem enables switching the automatic insertion of pipeline items on or off.
  110. type FeaturedPipelineItem interface {
  111. PipelineItem
  112. // Features returns the list of names which enable this item to be automatically inserted
  113. // in Pipeline.DeployItem().
  114. Features() []string
  115. }
  116. // LeafPipelineItem corresponds to the top level pipeline items which produce the end results.
  117. type LeafPipelineItem interface {
  118. PipelineItem
  119. // Flag returns the cmdline switch to run the analysis. Should be dash-lower-case
  120. // without the leading dashes.
  121. Flag() string
  122. // Description returns the text which explains what the analysis is doing.
  123. // Should start with a capital letter and end with a dot.
  124. Description() string
  125. // Finalize returns the result of the analysis.
  126. Finalize() interface{}
  127. // Serialize encodes the object returned by Finalize() to YAML or Protocol Buffers.
  128. Serialize(result interface{}, binary bool, writer io.Writer) error
  129. }
  130. // ResultMergeablePipelineItem specifies the methods to combine several analysis results together.
  131. type ResultMergeablePipelineItem interface {
  132. LeafPipelineItem
  133. // Deserialize loads the result from Protocol Buffers blob.
  134. Deserialize(pbmessage []byte) (interface{}, error)
  135. // MergeResults joins two results together. Common-s are specified as the global state.
  136. MergeResults(r1, r2 interface{}, c1, c2 *CommonAnalysisResult) interface{}
  137. }
  138. // HibernateablePipelineItem is the interface to allow pipeline items to be frozen (compacted, unloaded)
  139. // while they are not needed in the hosting branch.
  140. type HibernateablePipelineItem interface {
  141. PipelineItem
  142. // Hibernate signals that the item is temporarily not needed and it's memory can be optimized.
  143. Hibernate()
  144. // Boot signals that the item is needed again and must be de-hibernate-d.
  145. Boot()
  146. }
  147. // CommonAnalysisResult holds the information which is always extracted at Pipeline.Run().
  148. type CommonAnalysisResult struct {
  149. // BeginTime is the time of the first commit in the analysed sequence.
  150. BeginTime int64
  151. // EndTime is the time of the last commit in the analysed sequence.
  152. EndTime int64
  153. // CommitsNumber is the number of commits in the analysed sequence.
  154. CommitsNumber int
  155. // RunTime is the duration of Pipeline.Run().
  156. RunTime time.Duration
  157. // RunTimePerItem is the time elapsed by each PipelineItem.
  158. RunTimePerItem map[string]float64
  159. }
  160. // BeginTimeAsTime converts the UNIX timestamp of the beginning to Go time.
  161. func (car *CommonAnalysisResult) BeginTimeAsTime() time.Time {
  162. return time.Unix(car.BeginTime, 0)
  163. }
  164. // EndTimeAsTime converts the UNIX timestamp of the ending to Go time.
  165. func (car *CommonAnalysisResult) EndTimeAsTime() time.Time {
  166. return time.Unix(car.EndTime, 0)
  167. }
  168. // Merge combines the CommonAnalysisResult with an other one.
  169. // We choose the earlier BeginTime, the later EndTime, sum the number of commits and the
  170. // elapsed run times.
  171. func (car *CommonAnalysisResult) Merge(other *CommonAnalysisResult) {
  172. if car.EndTime == 0 || other.BeginTime == 0 {
  173. panic("Merging with an uninitialized CommonAnalysisResult")
  174. }
  175. if other.BeginTime < car.BeginTime {
  176. car.BeginTime = other.BeginTime
  177. }
  178. if other.EndTime > car.EndTime {
  179. car.EndTime = other.EndTime
  180. }
  181. car.CommitsNumber += other.CommitsNumber
  182. car.RunTime += other.RunTime
  183. for key, val := range other.RunTimePerItem {
  184. car.RunTimePerItem[key] += val
  185. }
  186. }
  187. // FillMetadata copies the data to a Protobuf message.
  188. func (car *CommonAnalysisResult) FillMetadata(meta *pb.Metadata) *pb.Metadata {
  189. meta.BeginUnixTime = car.BeginTime
  190. meta.EndUnixTime = car.EndTime
  191. meta.Commits = int32(car.CommitsNumber)
  192. meta.RunTime = car.RunTime.Nanoseconds() / 1e6
  193. meta.RunTimePerItem = car.RunTimePerItem
  194. return meta
  195. }
  196. // Metadata is defined in internal/pb/pb.pb.go - header of the binary file.
  197. type Metadata = pb.Metadata
  198. // MetadataToCommonAnalysisResult copies the data from a Protobuf message.
  199. func MetadataToCommonAnalysisResult(meta *Metadata) *CommonAnalysisResult {
  200. return &CommonAnalysisResult{
  201. BeginTime: meta.BeginUnixTime,
  202. EndTime: meta.EndUnixTime,
  203. CommitsNumber: int(meta.Commits),
  204. RunTime: time.Duration(meta.RunTime * 1e6),
  205. RunTimePerItem: meta.RunTimePerItem,
  206. }
  207. }
  208. // Pipeline is the core Hercules entity which carries several PipelineItems and executes them.
  209. // See the extended example of how a Pipeline works in doc.go
  210. type Pipeline struct {
  211. // OnProgress is the callback which is invoked in Analyse() to output it's
  212. // progress. The first argument is the number of complete steps and the
  213. // second is the total number of steps.
  214. OnProgress func(int, int)
  215. // DryRun indicates whether the items are not executed.
  216. DryRun bool
  217. // DumpPlan indicates whether to print the execution plan to stderr.
  218. DumpPlan bool
  219. // Repository points to the analysed Git repository struct from go-git.
  220. repository *git.Repository
  221. // Items are the registered building blocks in the pipeline. The order defines the
  222. // execution sequence.
  223. items []PipelineItem
  224. // The collection of parameters to create items.
  225. facts map[string]interface{}
  226. // Feature flags which enable the corresponding items.
  227. features map[string]bool
  228. }
  229. const (
  230. // ConfigPipelineDAGPath is the name of the Pipeline configuration option (Pipeline.Initialize())
  231. // which enables saving the items DAG to the specified file.
  232. ConfigPipelineDAGPath = "Pipeline.DAGPath"
  233. // ConfigPipelineDryRun is the name of the Pipeline configuration option (Pipeline.Initialize())
  234. // which disables Configure() and Initialize() invocation on each PipelineItem during the
  235. // Pipeline initialization.
  236. // Subsequent Run() calls are going to fail. Useful with ConfigPipelineDAGPath=true.
  237. ConfigPipelineDryRun = "Pipeline.DryRun"
  238. // ConfigPipelineCommits is the name of the Pipeline configuration option (Pipeline.Initialize())
  239. // which allows to specify the custom commit sequence. By default, Pipeline.Commits() is used.
  240. ConfigPipelineCommits = "Pipeline.Commits"
  241. // ConfigPipelineDumpPlan is the name of the Pipeline configuration option (Pipeline.Initialize())
  242. // which outputs the execution plan to stderr.
  243. ConfigPipelineDumpPlan = "Pipeline.DumpPlan"
  244. // DependencyCommit is the name of one of the three items in `deps` supplied to PipelineItem.Consume()
  245. // which always exists. It corresponds to the currently analyzed commit.
  246. DependencyCommit = "commit"
  247. // DependencyIndex is the name of one of the three items in `deps` supplied to PipelineItem.Consume()
  248. // which always exists. It corresponds to the currently analyzed commit's index.
  249. DependencyIndex = "index"
  250. // DependencyIsMerge is the name of one of the three items in `deps` supplied to PipelineItem.Consume()
  251. // which always exists. It indicates whether the analyzed commit is a merge commit.
  252. // Checking the number of parents is not correct - we remove the back edges during the DAG simplification.
  253. DependencyIsMerge = "is_merge"
  254. )
  255. // NewPipeline initializes a new instance of Pipeline struct.
  256. func NewPipeline(repository *git.Repository) *Pipeline {
  257. return &Pipeline{
  258. repository: repository,
  259. items: []PipelineItem{},
  260. facts: map[string]interface{}{},
  261. features: map[string]bool{},
  262. }
  263. }
  264. // GetFact returns the value of the fact with the specified name.
  265. func (pipeline *Pipeline) GetFact(name string) interface{} {
  266. return pipeline.facts[name]
  267. }
  268. // SetFact sets the value of the fact with the specified name.
  269. func (pipeline *Pipeline) SetFact(name string, value interface{}) {
  270. pipeline.facts[name] = value
  271. }
  272. // GetFeature returns the state of the feature with the specified name (enabled/disabled) and
  273. // whether it exists. See also: FeaturedPipelineItem.
  274. func (pipeline *Pipeline) GetFeature(name string) (bool, bool) {
  275. val, exists := pipeline.features[name]
  276. return val, exists
  277. }
  278. // SetFeature sets the value of the feature with the specified name.
  279. // See also: FeaturedPipelineItem.
  280. func (pipeline *Pipeline) SetFeature(name string) {
  281. pipeline.features[name] = true
  282. }
  283. // SetFeaturesFromFlags enables the features which were specified through the command line flags
  284. // which belong to the given PipelineItemRegistry instance.
  285. // See also: AddItem().
  286. func (pipeline *Pipeline) SetFeaturesFromFlags(registry ...*PipelineItemRegistry) {
  287. var ffr *PipelineItemRegistry
  288. if len(registry) == 0 {
  289. ffr = Registry
  290. } else if len(registry) == 1 {
  291. ffr = registry[0]
  292. } else {
  293. panic("Zero or one registry is allowed to be passed.")
  294. }
  295. for _, feature := range ffr.featureFlags.Flags {
  296. pipeline.SetFeature(feature)
  297. }
  298. }
  299. // DeployItem inserts a PipelineItem into the pipeline. It also recursively creates all of it's
  300. // dependencies (PipelineItem.Requires()). Returns the same item as specified in the arguments.
  301. func (pipeline *Pipeline) DeployItem(item PipelineItem) PipelineItem {
  302. fpi, ok := item.(FeaturedPipelineItem)
  303. if ok {
  304. for _, f := range fpi.Features() {
  305. pipeline.SetFeature(f)
  306. }
  307. }
  308. queue := []PipelineItem{}
  309. queue = append(queue, item)
  310. added := map[string]PipelineItem{}
  311. for _, item := range pipeline.items {
  312. added[item.Name()] = item
  313. }
  314. added[item.Name()] = item
  315. pipeline.AddItem(item)
  316. for len(queue) > 0 {
  317. head := queue[0]
  318. queue = queue[1:]
  319. for _, dep := range head.Requires() {
  320. for _, sibling := range Registry.Summon(dep) {
  321. if _, exists := added[sibling.Name()]; !exists {
  322. disabled := false
  323. // If this item supports features, check them against the activated in pipeline.features
  324. if fpi, matches := sibling.(FeaturedPipelineItem); matches {
  325. for _, feature := range fpi.Features() {
  326. if !pipeline.features[feature] {
  327. disabled = true
  328. break
  329. }
  330. }
  331. }
  332. if disabled {
  333. continue
  334. }
  335. added[sibling.Name()] = sibling
  336. queue = append(queue, sibling)
  337. pipeline.AddItem(sibling)
  338. }
  339. }
  340. }
  341. }
  342. return item
  343. }
  344. // AddItem inserts a PipelineItem into the pipeline. It does not check any dependencies.
  345. // See also: DeployItem().
  346. func (pipeline *Pipeline) AddItem(item PipelineItem) PipelineItem {
  347. pipeline.items = append(pipeline.items, item)
  348. return item
  349. }
  350. // RemoveItem deletes a PipelineItem from the pipeline. It leaves all the rest of the items intact.
  351. func (pipeline *Pipeline) RemoveItem(item PipelineItem) {
  352. for i, reg := range pipeline.items {
  353. if reg == item {
  354. pipeline.items = append(pipeline.items[:i], pipeline.items[i+1:]...)
  355. return
  356. }
  357. }
  358. }
  359. // Len returns the number of items in the pipeline.
  360. func (pipeline *Pipeline) Len() int {
  361. return len(pipeline.items)
  362. }
  363. // Commits returns the list of commits from the history similar to `git log` over the HEAD.
  364. // `firstParent` specifies whether to leave only the first parent after each merge
  365. // (`git log --first-parent`) - effectively decreasing the accuracy but increasing performance.
  366. func (pipeline *Pipeline) Commits(firstParent bool) ([]*object.Commit, error) {
  367. var result []*object.Commit
  368. repository := pipeline.repository
  369. head, err := repository.Head()
  370. if err != nil {
  371. if err == plumbing.ErrReferenceNotFound {
  372. refs, errr := repository.References()
  373. if errr != nil {
  374. return nil, errors.Wrap(errr, "unable to list the references")
  375. }
  376. refs.ForEach(func(ref *plumbing.Reference) error {
  377. if strings.HasPrefix(ref.Name().String(), "refs/heads/HEAD/") {
  378. head = ref
  379. return storer.ErrStop
  380. }
  381. return nil
  382. })
  383. }
  384. if head == nil && err != nil {
  385. return nil, errors.Wrap(err, "unable to collect the commit history")
  386. }
  387. }
  388. if firstParent {
  389. commit, err := repository.CommitObject(head.Hash())
  390. if err != nil {
  391. panic(err)
  392. }
  393. // the first parent matches the head
  394. for ; err != io.EOF; commit, err = commit.Parents().Next() {
  395. if err != nil {
  396. panic(err)
  397. }
  398. result = append(result, commit)
  399. }
  400. // reverse the order
  401. for i, j := 0, len(result)-1; i < j; i, j = i+1, j-1 {
  402. result[i], result[j] = result[j], result[i]
  403. }
  404. return result, nil
  405. }
  406. cit, err := repository.Log(&git.LogOptions{From: head.Hash()})
  407. if err != nil {
  408. return nil, errors.Wrap(err, "unable to collect the commit history")
  409. }
  410. defer cit.Close()
  411. cit.ForEach(func(commit *object.Commit) error {
  412. result = append(result, commit)
  413. return nil
  414. })
  415. return result, nil
  416. }
  417. type sortablePipelineItems []PipelineItem
  418. func (items sortablePipelineItems) Len() int {
  419. return len(items)
  420. }
  421. func (items sortablePipelineItems) Less(i, j int) bool {
  422. return items[i].Name() < items[j].Name()
  423. }
  424. func (items sortablePipelineItems) Swap(i, j int) {
  425. items[i], items[j] = items[j], items[i]
  426. }
  427. func (pipeline *Pipeline) resolve(dumpPath string) {
  428. graph := toposort.NewGraph()
  429. sort.Sort(sortablePipelineItems(pipeline.items))
  430. name2item := map[string]PipelineItem{}
  431. ambiguousMap := map[string][]string{}
  432. nameUsages := map[string]int{}
  433. for _, item := range pipeline.items {
  434. nameUsages[item.Name()]++
  435. }
  436. counters := map[string]int{}
  437. for _, item := range pipeline.items {
  438. name := item.Name()
  439. if nameUsages[name] > 1 {
  440. index := counters[item.Name()] + 1
  441. counters[item.Name()] = index
  442. name = fmt.Sprintf("%s_%d", item.Name(), index)
  443. }
  444. graph.AddNode(name)
  445. name2item[name] = item
  446. for _, key := range item.Provides() {
  447. key = "[" + key + "]"
  448. graph.AddNode(key)
  449. if graph.AddEdge(name, key) > 1 {
  450. if ambiguousMap[key] != nil {
  451. fmt.Fprintln(os.Stderr, "Pipeline:")
  452. for _, item2 := range pipeline.items {
  453. if item2 == item {
  454. fmt.Fprint(os.Stderr, "> ")
  455. }
  456. fmt.Fprint(os.Stderr, item2.Name(), " [")
  457. for i, key2 := range item2.Provides() {
  458. fmt.Fprint(os.Stderr, key2)
  459. if i < len(item.Provides())-1 {
  460. fmt.Fprint(os.Stderr, ", ")
  461. }
  462. }
  463. fmt.Fprintln(os.Stderr, "]")
  464. }
  465. panic("Failed to resolve pipeline dependencies: ambiguous graph.")
  466. }
  467. ambiguousMap[key] = graph.FindParents(key)
  468. }
  469. }
  470. }
  471. counters = map[string]int{}
  472. for _, item := range pipeline.items {
  473. name := item.Name()
  474. if nameUsages[name] > 1 {
  475. index := counters[item.Name()] + 1
  476. counters[item.Name()] = index
  477. name = fmt.Sprintf("%s_%d", item.Name(), index)
  478. }
  479. for _, key := range item.Requires() {
  480. key = "[" + key + "]"
  481. if graph.AddEdge(key, name) == 0 {
  482. log.Panicf("Unsatisfied dependency: %s -> %s", key, item.Name())
  483. }
  484. }
  485. }
  486. // Try to break the cycles in some known scenarios.
  487. if len(ambiguousMap) > 0 {
  488. var ambiguous []string
  489. for key := range ambiguousMap {
  490. ambiguous = append(ambiguous, key)
  491. }
  492. sort.Strings(ambiguous)
  493. bfsorder := graph.BreadthSort()
  494. bfsindex := map[string]int{}
  495. for i, s := range bfsorder {
  496. bfsindex[s] = i
  497. }
  498. for len(ambiguous) > 0 {
  499. key := ambiguous[0]
  500. ambiguous = ambiguous[1:]
  501. pair := ambiguousMap[key]
  502. inheritor := pair[1]
  503. if bfsindex[pair[1]] < bfsindex[pair[0]] {
  504. inheritor = pair[0]
  505. }
  506. removed := graph.RemoveEdge(key, inheritor)
  507. cycle := map[string]bool{}
  508. for _, node := range graph.FindCycle(key) {
  509. cycle[node] = true
  510. }
  511. if len(cycle) == 0 {
  512. cycle[inheritor] = true
  513. }
  514. if removed {
  515. graph.AddEdge(key, inheritor)
  516. }
  517. graph.RemoveEdge(inheritor, key)
  518. graph.ReindexNode(inheritor)
  519. // for all nodes key links to except those in cycle, put the link from inheritor
  520. for _, node := range graph.FindChildren(key) {
  521. if _, exists := cycle[node]; !exists {
  522. graph.AddEdge(inheritor, node)
  523. graph.RemoveEdge(key, node)
  524. }
  525. }
  526. graph.ReindexNode(key)
  527. }
  528. }
  529. var graphCopy *toposort.Graph
  530. if dumpPath != "" {
  531. graphCopy = graph.Copy()
  532. }
  533. strplan, ok := graph.Toposort()
  534. if !ok {
  535. panic("Failed to resolve pipeline dependencies: unable to topologically sort the items.")
  536. }
  537. pipeline.items = make([]PipelineItem, 0, len(pipeline.items))
  538. for _, key := range strplan {
  539. if item, ok := name2item[key]; ok {
  540. pipeline.items = append(pipeline.items, item)
  541. }
  542. }
  543. if dumpPath != "" {
  544. // If there is a floating difference, uncomment this:
  545. // fmt.Fprint(os.Stderr, graphCopy.DebugDump())
  546. ioutil.WriteFile(dumpPath, []byte(graphCopy.Serialize(strplan)), 0666)
  547. absPath, _ := filepath.Abs(dumpPath)
  548. log.Printf("Wrote the DAG to %s\n", absPath)
  549. }
  550. }
  551. // Initialize prepares the pipeline for the execution (Run()). This function
  552. // resolves the execution DAG, Configure()-s and Initialize()-s the items in it in the
  553. // topological dependency order. `facts` are passed inside Configure(). They are mutable.
  554. func (pipeline *Pipeline) Initialize(facts map[string]interface{}) error {
  555. if facts == nil {
  556. facts = map[string]interface{}{}
  557. }
  558. if _, exists := facts[ConfigPipelineCommits]; !exists {
  559. var err error
  560. facts[ConfigPipelineCommits], err = pipeline.Commits(false)
  561. if err != nil {
  562. log.Panicf("failed to list the commits: %v", err)
  563. }
  564. }
  565. dumpPath, _ := facts[ConfigPipelineDAGPath].(string)
  566. pipeline.resolve(dumpPath)
  567. if dumpPlan, exists := facts[ConfigPipelineDumpPlan].(bool); exists {
  568. pipeline.DumpPlan = dumpPlan
  569. }
  570. if dryRun, exists := facts[ConfigPipelineDryRun].(bool); exists {
  571. pipeline.DryRun = dryRun
  572. if dryRun {
  573. return nil
  574. }
  575. }
  576. for _, item := range pipeline.items {
  577. err := item.Configure(facts)
  578. if err != nil {
  579. return errors.Wrapf(err, "%s failed to configure", item.Name())
  580. }
  581. }
  582. for _, item := range pipeline.items {
  583. err := item.Initialize(pipeline.repository)
  584. if err != nil {
  585. return errors.Wrapf(err, "%s failed to initialize", item.Name())
  586. }
  587. }
  588. return nil
  589. }
  590. // Run method executes the pipeline.
  591. //
  592. // `commits` is a slice with the git commits to analyse. Multiple branches are supported.
  593. //
  594. // Returns the mapping from each LeafPipelineItem to the corresponding analysis result.
  595. // There is always a "nil" record with CommonAnalysisResult.
  596. func (pipeline *Pipeline) Run(commits []*object.Commit) (map[LeafPipelineItem]interface{}, error) {
  597. startRunTime := time.Now()
  598. onProgress := pipeline.OnProgress
  599. if onProgress == nil {
  600. onProgress = func(int, int) {}
  601. }
  602. plan := prepareRunPlan(commits, pipeline.DumpPlan)
  603. progressSteps := len(plan) + 2
  604. branches := map[int][]PipelineItem{}
  605. // we will need rootClone if there is more than one root branch
  606. var rootClone []PipelineItem
  607. if !pipeline.DryRun {
  608. rootClone = cloneItems(pipeline.items, 1)[0]
  609. }
  610. var newestTime int64
  611. runTimePerItem := map[string]float64{}
  612. commitIndex := 0
  613. for index, step := range plan {
  614. onProgress(index+1, progressSteps)
  615. if pipeline.DryRun {
  616. continue
  617. }
  618. firstItem := step.Items[0]
  619. switch step.Action {
  620. case runActionCommit:
  621. state := map[string]interface{}{
  622. DependencyCommit: step.Commit,
  623. DependencyIndex: commitIndex,
  624. DependencyIsMerge: (index > 0 &&
  625. plan[index-1].Action == runActionCommit &&
  626. plan[index-1].Commit.Hash == step.Commit.Hash) ||
  627. (index < (len(plan)-1) &&
  628. plan[index+1].Action == runActionCommit &&
  629. plan[index+1].Commit.Hash == step.Commit.Hash),
  630. }
  631. for _, item := range branches[firstItem] {
  632. startTime := time.Now()
  633. update, err := item.Consume(state)
  634. runTimePerItem[item.Name()] += time.Now().Sub(startTime).Seconds()
  635. if err != nil {
  636. log.Printf("%s failed on commit #%d (%d) %s\n",
  637. item.Name(), commitIndex+1, index+1, step.Commit.Hash.String())
  638. return nil, err
  639. }
  640. for _, key := range item.Provides() {
  641. val, ok := update[key]
  642. if !ok {
  643. log.Panicf("%s: Consume() did not return %s", item.Name(), key)
  644. }
  645. state[key] = val
  646. }
  647. }
  648. commitTime := step.Commit.Committer.When.Unix()
  649. if commitTime > newestTime {
  650. newestTime = commitTime
  651. }
  652. commitIndex++
  653. case runActionFork:
  654. for i, clone := range cloneItems(branches[firstItem], len(step.Items)-1) {
  655. branches[step.Items[i+1]] = clone
  656. }
  657. case runActionMerge:
  658. merged := make([][]PipelineItem, len(step.Items))
  659. for i, b := range step.Items {
  660. merged[i] = branches[b]
  661. }
  662. mergeItems(merged)
  663. case runActionEmerge:
  664. if firstItem == rootBranchIndex {
  665. branches[firstItem] = pipeline.items
  666. } else {
  667. branches[firstItem] = cloneItems(rootClone, 1)[0]
  668. }
  669. case runActionDelete:
  670. delete(branches, firstItem)
  671. }
  672. }
  673. onProgress(len(plan)+1, progressSteps)
  674. result := map[LeafPipelineItem]interface{}{}
  675. if !pipeline.DryRun {
  676. for index, item := range getMasterBranch(branches) {
  677. if casted, ok := item.(LeafPipelineItem); ok {
  678. result[pipeline.items[index].(LeafPipelineItem)] = casted.Finalize()
  679. }
  680. }
  681. }
  682. onProgress(progressSteps, progressSteps)
  683. result[nil] = &CommonAnalysisResult{
  684. BeginTime: plan[0].Commit.Committer.When.Unix(),
  685. EndTime: newestTime,
  686. CommitsNumber: len(commits),
  687. RunTime: time.Since(startRunTime),
  688. RunTimePerItem: runTimePerItem,
  689. }
  690. return result, nil
  691. }
  692. // LoadCommitsFromFile reads the file by the specified FS path and generates the sequence of commits
  693. // by interpreting each line as a Git commit hash.
  694. func LoadCommitsFromFile(path string, repository *git.Repository) ([]*object.Commit, error) {
  695. var file io.ReadCloser
  696. if path != "-" {
  697. var err error
  698. file, err = os.Open(path)
  699. if err != nil {
  700. return nil, err
  701. }
  702. defer file.Close()
  703. } else {
  704. file = os.Stdin
  705. }
  706. scanner := bufio.NewScanner(file)
  707. var commits []*object.Commit
  708. for scanner.Scan() {
  709. hash := plumbing.NewHash(scanner.Text())
  710. if len(hash) != 20 {
  711. return nil, errors.New("invalid commit hash " + scanner.Text())
  712. }
  713. commit, err := repository.CommitObject(hash)
  714. if err != nil {
  715. return nil, err
  716. }
  717. commits = append(commits, commit)
  718. }
  719. return commits, nil
  720. }