pipeline.go 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960
  1. package core
  2. import (
  3. "bufio"
  4. "fmt"
  5. "io"
  6. "io/ioutil"
  7. "log"
  8. "os"
  9. "path/filepath"
  10. "runtime/debug"
  11. "sort"
  12. "strings"
  13. "time"
  14. "github.com/pkg/errors"
  15. "gopkg.in/src-d/go-git.v4"
  16. "gopkg.in/src-d/go-git.v4/plumbing"
  17. "gopkg.in/src-d/go-git.v4/plumbing/object"
  18. "gopkg.in/src-d/go-git.v4/plumbing/storer"
  19. "gopkg.in/src-d/hercules.v10/internal/pb"
  20. "gopkg.in/src-d/hercules.v10/internal/toposort"
  21. )
  22. // ConfigurationOptionType represents the possible types of a ConfigurationOption's value.
  23. type ConfigurationOptionType int
  24. const (
  25. // BoolConfigurationOption reflects the boolean value type.
  26. BoolConfigurationOption ConfigurationOptionType = iota
  27. // IntConfigurationOption reflects the integer value type.
  28. IntConfigurationOption
  29. // StringConfigurationOption reflects the string value type.
  30. StringConfigurationOption
  31. // FloatConfigurationOption reflects a floating point value type.
  32. FloatConfigurationOption
  33. // StringsConfigurationOption reflects the array of strings value type.
  34. StringsConfigurationOption
  35. // PathConfigurationOption reflects the file system path value type.
  36. PathConfigurationOption
  37. )
  38. // String() returns an empty string for the boolean type, "int" for integers and "string" for
  39. // strings. It is used in the command line interface to show the argument's type.
  40. func (opt ConfigurationOptionType) String() string {
  41. switch opt {
  42. case BoolConfigurationOption:
  43. return ""
  44. case IntConfigurationOption:
  45. return "int"
  46. case StringConfigurationOption:
  47. return "string"
  48. case FloatConfigurationOption:
  49. return "float"
  50. case StringsConfigurationOption:
  51. return "string"
  52. case PathConfigurationOption:
  53. return "path"
  54. }
  55. log.Panicf("Invalid ConfigurationOptionType value %d", opt)
  56. return ""
  57. }
  58. // ConfigurationOption allows for the unified, retrospective way to setup PipelineItem-s.
  59. type ConfigurationOption struct {
  60. // Name identifies the configuration option in facts.
  61. Name string
  62. // Description represents the help text about the configuration option.
  63. Description string
  64. // Flag corresponds to the CLI token with "--" prepended.
  65. Flag string
  66. // Type specifies the kind of the configuration option's value.
  67. Type ConfigurationOptionType
  68. // Default is the initial value of the configuration option.
  69. Default interface{}
  70. }
  71. // FormatDefault converts the default value of ConfigurationOption to string.
  72. // Used in the command line interface to show the argument's default value.
  73. func (opt ConfigurationOption) FormatDefault() string {
  74. if opt.Type == StringsConfigurationOption {
  75. return fmt.Sprintf("\"%s\"", strings.Join(opt.Default.([]string), ","))
  76. }
  77. if opt.Type != StringConfigurationOption {
  78. return fmt.Sprint(opt.Default)
  79. }
  80. return fmt.Sprintf("\"%s\"", opt.Default)
  81. }
  82. // PipelineItem is the interface for all the units in the Git commits analysis pipeline.
  83. type PipelineItem interface {
  84. // Name returns the name of the analysis.
  85. Name() string
  86. // Provides returns the list of keys of reusable calculated entities.
  87. // Other items may depend on them.
  88. Provides() []string
  89. // Requires returns the list of keys of needed entities which must be supplied in Consume().
  90. Requires() []string
  91. // ListConfigurationOptions returns the list of available options which can be consumed by Configure().
  92. ListConfigurationOptions() []ConfigurationOption
  93. // Configure performs the initial setup of the object by applying parameters from facts.
  94. // It allows to create PipelineItems in a universal way.
  95. Configure(facts map[string]interface{}) error
  96. // Initialize prepares and resets the item. Consume() requires Initialize()
  97. // to be called at least once beforehand.
  98. Initialize(*git.Repository) error
  99. // Consume processes the next commit.
  100. // deps contains the required entities which match Depends(). Besides, it always includes
  101. // DependencyCommit and DependencyIndex.
  102. // Returns the calculated entities which match Provides().
  103. Consume(deps map[string]interface{}) (map[string]interface{}, error)
  104. // Fork clones the item the requested number of times. The data links between the clones
  105. // are up to the implementation. Needed to handle Git branches. See also Merge().
  106. // Returns a slice with `n` fresh clones. In other words, it does not include the original item.
  107. Fork(n int) []PipelineItem
  108. // Merge combines several branches together. Each is supposed to have been created with Fork().
  109. // The result is stored in the called item, thus this function returns nothing.
  110. // Merge() must update all the branches, not only self. When several branches merge, some of
  111. // them may continue to live, hence this requirement.
  112. Merge(branches []PipelineItem)
  113. }
  114. // FeaturedPipelineItem enables switching the automatic insertion of pipeline items on or off.
  115. type FeaturedPipelineItem interface {
  116. PipelineItem
  117. // Features returns the list of names which enable this item to be automatically inserted
  118. // in Pipeline.DeployItem().
  119. Features() []string
  120. }
  121. // DisposablePipelineItem enables resources cleanup after finishing running the pipeline.
  122. type DisposablePipelineItem interface {
  123. PipelineItem
  124. // Dispose frees any previously allocated unmanaged resources. No Consume() calls are possible
  125. // afterwards. The item needs to be Initialize()-d again.
  126. // This method is invoked once for each item in the pipeline, **in a single forked instance**.
  127. // Thus it is the responsibility of the item's programmer to deal with forks and merges, if
  128. // necessary.
  129. Dispose()
  130. }
  131. // LeafPipelineItem corresponds to the top level pipeline items which produce the end results.
  132. type LeafPipelineItem interface {
  133. PipelineItem
  134. // Flag returns the cmdline switch to run the analysis. Should be dash-lower-case
  135. // without the leading dashes.
  136. Flag() string
  137. // Description returns the text which explains what the analysis is doing.
  138. // Should start with a capital letter and end with a dot.
  139. Description() string
  140. // Finalize returns the result of the analysis.
  141. Finalize() interface{}
  142. // Serialize encodes the object returned by Finalize() to YAML or Protocol Buffers.
  143. Serialize(result interface{}, binary bool, writer io.Writer) error
  144. }
  145. // ResultMergeablePipelineItem specifies the methods to combine several analysis results together.
  146. type ResultMergeablePipelineItem interface {
  147. LeafPipelineItem
  148. // Deserialize loads the result from Protocol Buffers blob.
  149. Deserialize(pbmessage []byte) (interface{}, error)
  150. // MergeResults joins two results together. Common-s are specified as the global state.
  151. MergeResults(r1, r2 interface{}, c1, c2 *CommonAnalysisResult) interface{}
  152. }
  153. // HibernateablePipelineItem is the interface to allow pipeline items to be frozen (compacted, unloaded)
  154. // while they are not needed in the hosting branch.
  155. type HibernateablePipelineItem interface {
  156. PipelineItem
  157. // Hibernate signals that the item is temporarily not needed and it's memory can be optimized.
  158. Hibernate() error
  159. // Boot signals that the item is needed again and must be de-hibernate-d.
  160. Boot() error
  161. }
  162. // CommonAnalysisResult holds the information which is always extracted at Pipeline.Run().
  163. type CommonAnalysisResult struct {
  164. // BeginTime is the time of the first commit in the analysed sequence.
  165. BeginTime int64
  166. // EndTime is the time of the last commit in the analysed sequence.
  167. EndTime int64
  168. // CommitsNumber is the number of commits in the analysed sequence.
  169. CommitsNumber int
  170. // RunTime is the duration of Pipeline.Run().
  171. RunTime time.Duration
  172. // RunTimePerItem is the time elapsed by each PipelineItem.
  173. RunTimePerItem map[string]float64
  174. }
  175. // Copy produces a deep clone of the object.
  176. func (car CommonAnalysisResult) Copy() CommonAnalysisResult {
  177. result := car
  178. result.RunTimePerItem = map[string]float64{}
  179. for key, val := range car.RunTimePerItem {
  180. result.RunTimePerItem[key] = val
  181. }
  182. return result
  183. }
  184. // BeginTimeAsTime converts the UNIX timestamp of the beginning to Go time.
  185. func (car *CommonAnalysisResult) BeginTimeAsTime() time.Time {
  186. return time.Unix(car.BeginTime, 0)
  187. }
  188. // EndTimeAsTime converts the UNIX timestamp of the ending to Go time.
  189. func (car *CommonAnalysisResult) EndTimeAsTime() time.Time {
  190. return time.Unix(car.EndTime, 0)
  191. }
  192. // Merge combines the CommonAnalysisResult with an other one.
  193. // We choose the earlier BeginTime, the later EndTime, sum the number of commits and the
  194. // elapsed run times.
  195. func (car *CommonAnalysisResult) Merge(other *CommonAnalysisResult) {
  196. if car.EndTime == 0 || other.BeginTime == 0 {
  197. panic("Merging with an uninitialized CommonAnalysisResult")
  198. }
  199. if other.BeginTime < car.BeginTime {
  200. car.BeginTime = other.BeginTime
  201. }
  202. if other.EndTime > car.EndTime {
  203. car.EndTime = other.EndTime
  204. }
  205. car.CommitsNumber += other.CommitsNumber
  206. car.RunTime += other.RunTime
  207. for key, val := range other.RunTimePerItem {
  208. car.RunTimePerItem[key] += val
  209. }
  210. }
  211. // FillMetadata copies the data to a Protobuf message.
  212. func (car *CommonAnalysisResult) FillMetadata(meta *pb.Metadata) *pb.Metadata {
  213. meta.BeginUnixTime = car.BeginTime
  214. meta.EndUnixTime = car.EndTime
  215. meta.Commits = int32(car.CommitsNumber)
  216. meta.RunTime = car.RunTime.Nanoseconds() / 1e6
  217. meta.RunTimePerItem = car.RunTimePerItem
  218. return meta
  219. }
  220. // Metadata is defined in internal/pb/pb.pb.go - header of the binary file.
  221. type Metadata = pb.Metadata
  222. // MetadataToCommonAnalysisResult copies the data from a Protobuf message.
  223. func MetadataToCommonAnalysisResult(meta *Metadata) *CommonAnalysisResult {
  224. return &CommonAnalysisResult{
  225. BeginTime: meta.BeginUnixTime,
  226. EndTime: meta.EndUnixTime,
  227. CommitsNumber: int(meta.Commits),
  228. RunTime: time.Duration(meta.RunTime * 1e6),
  229. RunTimePerItem: meta.RunTimePerItem,
  230. }
  231. }
  232. // Pipeline is the core Hercules entity which carries several PipelineItems and executes them.
  233. // See the extended example of how a Pipeline works in doc.go
  234. type Pipeline struct {
  235. // OnProgress is the callback which is invoked in Analyse() to output it's
  236. // progress. The first argument is the number of complete steps, the
  237. // second is the total number of steps and the third is some description of the current action.
  238. OnProgress func(int, int, string)
  239. // HibernationDistance is the minimum number of actions between two sequential usages of
  240. // a branch to activate the hibernation optimization (cpu-memory trade-off). 0 disables.
  241. HibernationDistance int
  242. // DryRun indicates whether the items are not executed.
  243. DryRun bool
  244. // DumpPlan indicates whether to print the execution plan to stderr.
  245. DumpPlan bool
  246. // PrintActions indicates whether to print the taken actions during the execution.
  247. PrintActions bool
  248. // Repository points to the analysed Git repository struct from go-git.
  249. repository *git.Repository
  250. // Items are the registered building blocks in the pipeline. The order defines the
  251. // execution sequence.
  252. items []PipelineItem
  253. // The collection of parameters to create items.
  254. facts map[string]interface{}
  255. // Feature flags which enable the corresponding items.
  256. features map[string]bool
  257. // The logger for printing output.
  258. l Logger
  259. }
  260. const (
  261. // ConfigPipelineDAGPath is the name of the Pipeline configuration option (Pipeline.Initialize())
  262. // which enables saving the items DAG to the specified file.
  263. ConfigPipelineDAGPath = "Pipeline.DAGPath"
  264. // ConfigPipelineDryRun is the name of the Pipeline configuration option (Pipeline.Initialize())
  265. // which disables Configure() and Initialize() invocation on each PipelineItem during the
  266. // Pipeline initialization.
  267. // Subsequent Run() calls are going to fail. Useful with ConfigPipelineDAGPath=true.
  268. ConfigPipelineDryRun = "Pipeline.DryRun"
  269. // ConfigPipelineCommits is the name of the Pipeline configuration option (Pipeline.Initialize())
  270. // which allows to specify the custom commit sequence. By default, Pipeline.Commits() is used.
  271. ConfigPipelineCommits = "Pipeline.Commits"
  272. // ConfigPipelineDumpPlan is the name of the Pipeline configuration option (Pipeline.Initialize())
  273. // which outputs the execution plan to stderr.
  274. ConfigPipelineDumpPlan = "Pipeline.DumpPlan"
  275. // ConfigPipelineHibernationDistance is the name of the Pipeline configuration option (Pipeline.Initialize())
  276. // which is the minimum number of actions between two sequential usages of
  277. // a branch to activate the hibernation optimization (cpu-memory trade-off). 0 disables.
  278. ConfigPipelineHibernationDistance = "Pipeline.HibernationDistance"
  279. // ConfigPipelinePrintActions is the name of the Pipeline configuration option (Pipeline.Initialize())
  280. // which enables printing the taken actions of the execution plan to stderr.
  281. ConfigPipelinePrintActions = "Pipeline.PrintActions"
  282. // DependencyCommit is the name of one of the three items in `deps` supplied to PipelineItem.Consume()
  283. // which always exists. It corresponds to the currently analyzed commit.
  284. DependencyCommit = "commit"
  285. // DependencyIndex is the name of one of the three items in `deps` supplied to PipelineItem.Consume()
  286. // which always exists. It corresponds to the currently analyzed commit's index.
  287. DependencyIndex = "index"
  288. // DependencyIsMerge is the name of one of the three items in `deps` supplied to PipelineItem.Consume()
  289. // which always exists. It indicates whether the analyzed commit is a merge commit.
  290. // Checking the number of parents is not correct - we remove the back edges during the DAG simplification.
  291. DependencyIsMerge = "is_merge"
  292. // MessageFinalize is the status text reported before calling LeafPipelineItem.Finalize()-s.
  293. MessageFinalize = "finalize"
  294. )
  295. // NewPipeline initializes a new instance of Pipeline struct.
  296. func NewPipeline(repository *git.Repository) *Pipeline {
  297. return &Pipeline{
  298. repository: repository,
  299. items: []PipelineItem{},
  300. facts: map[string]interface{}{},
  301. features: map[string]bool{},
  302. l: NewLogger(),
  303. }
  304. }
  305. // GetFact returns the value of the fact with the specified name.
  306. func (pipeline *Pipeline) GetFact(name string) interface{} {
  307. return pipeline.facts[name]
  308. }
  309. // SetFact sets the value of the fact with the specified name.
  310. func (pipeline *Pipeline) SetFact(name string, value interface{}) {
  311. pipeline.facts[name] = value
  312. }
  313. // GetFeature returns the state of the feature with the specified name (enabled/disabled) and
  314. // whether it exists. See also: FeaturedPipelineItem.
  315. func (pipeline *Pipeline) GetFeature(name string) (bool, bool) {
  316. val, exists := pipeline.features[name]
  317. return val, exists
  318. }
  319. // SetFeature sets the value of the feature with the specified name.
  320. // See also: FeaturedPipelineItem.
  321. func (pipeline *Pipeline) SetFeature(name string) {
  322. pipeline.features[name] = true
  323. }
  324. // SetFeaturesFromFlags enables the features which were specified through the command line flags
  325. // which belong to the given PipelineItemRegistry instance.
  326. // See also: AddItem().
  327. func (pipeline *Pipeline) SetFeaturesFromFlags(registry ...*PipelineItemRegistry) {
  328. var ffr *PipelineItemRegistry
  329. if len(registry) == 0 {
  330. ffr = Registry
  331. } else if len(registry) == 1 {
  332. ffr = registry[0]
  333. } else {
  334. panic("Zero or one registry is allowed to be passed.")
  335. }
  336. for _, feature := range ffr.featureFlags.Flags {
  337. pipeline.SetFeature(feature)
  338. }
  339. }
  340. // DeployItem inserts a PipelineItem into the pipeline. It also recursively creates all of it's
  341. // dependencies (PipelineItem.Requires()). Returns the same item as specified in the arguments.
  342. func (pipeline *Pipeline) DeployItem(item PipelineItem) PipelineItem {
  343. fpi, ok := item.(FeaturedPipelineItem)
  344. if ok {
  345. for _, f := range fpi.Features() {
  346. pipeline.SetFeature(f)
  347. }
  348. }
  349. var queue []PipelineItem
  350. queue = append(queue, item)
  351. added := map[string]PipelineItem{}
  352. for _, item := range pipeline.items {
  353. added[item.Name()] = item
  354. }
  355. added[item.Name()] = item
  356. pipeline.AddItem(item)
  357. for len(queue) > 0 {
  358. head := queue[0]
  359. queue = queue[1:]
  360. for _, dep := range head.Requires() {
  361. for _, sibling := range Registry.Summon(dep) {
  362. if _, exists := added[sibling.Name()]; !exists {
  363. disabled := false
  364. // If this item supports features, check them against the activated in pipeline.features
  365. if fpi, matches := sibling.(FeaturedPipelineItem); matches {
  366. for _, feature := range fpi.Features() {
  367. if !pipeline.features[feature] {
  368. disabled = true
  369. break
  370. }
  371. }
  372. }
  373. if disabled {
  374. continue
  375. }
  376. added[sibling.Name()] = sibling
  377. queue = append(queue, sibling)
  378. pipeline.AddItem(sibling)
  379. }
  380. }
  381. }
  382. }
  383. return item
  384. }
  385. // AddItem inserts a PipelineItem into the pipeline. It does not check any dependencies.
  386. // See also: DeployItem().
  387. func (pipeline *Pipeline) AddItem(item PipelineItem) PipelineItem {
  388. pipeline.items = append(pipeline.items, item)
  389. return item
  390. }
  391. // RemoveItem deletes a PipelineItem from the pipeline. It leaves all the rest of the items intact.
  392. func (pipeline *Pipeline) RemoveItem(item PipelineItem) {
  393. for i, reg := range pipeline.items {
  394. if reg == item {
  395. pipeline.items = append(pipeline.items[:i], pipeline.items[i+1:]...)
  396. return
  397. }
  398. }
  399. }
  400. // Len returns the number of items in the pipeline.
  401. func (pipeline *Pipeline) Len() int {
  402. return len(pipeline.items)
  403. }
  404. // Commits returns the list of commits from the history similar to `git log` over the HEAD.
  405. // `firstParent` specifies whether to leave only the first parent after each merge
  406. // (`git log --first-parent`) - effectively decreasing the accuracy but increasing performance.
  407. func (pipeline *Pipeline) Commits(firstParent bool) ([]*object.Commit, error) {
  408. var result []*object.Commit
  409. repository := pipeline.repository
  410. head, err := repository.Head()
  411. if err != nil {
  412. if err == plumbing.ErrReferenceNotFound {
  413. refs, errr := repository.References()
  414. if errr != nil {
  415. return nil, errors.Wrap(errr, "unable to list the references")
  416. }
  417. refs.ForEach(func(ref *plumbing.Reference) error {
  418. if strings.HasPrefix(ref.Name().String(), "refs/heads/HEAD/") {
  419. head = ref
  420. return storer.ErrStop
  421. }
  422. return nil
  423. })
  424. }
  425. if head == nil && err != nil {
  426. return nil, errors.Wrap(err, "unable to collect the commit history")
  427. }
  428. }
  429. if firstParent {
  430. commit, err := repository.CommitObject(head.Hash())
  431. if err != nil {
  432. panic(err)
  433. }
  434. // the first parent matches the head
  435. for ; err != io.EOF; commit, err = commit.Parents().Next() {
  436. if err != nil {
  437. panic(err)
  438. }
  439. result = append(result, commit)
  440. }
  441. // reverse the order
  442. for i, j := 0, len(result)-1; i < j; i, j = i+1, j-1 {
  443. result[i], result[j] = result[j], result[i]
  444. }
  445. return result, nil
  446. }
  447. cit, err := repository.Log(&git.LogOptions{From: head.Hash()})
  448. if err != nil {
  449. return nil, errors.Wrap(err, "unable to collect the commit history")
  450. }
  451. defer cit.Close()
  452. cit.ForEach(func(commit *object.Commit) error {
  453. result = append(result, commit)
  454. return nil
  455. })
  456. return result, nil
  457. }
  458. // HeadCommit returns the latest commit in the repository (HEAD).
  459. func (pipeline *Pipeline) HeadCommit() ([]*object.Commit, error) {
  460. repository := pipeline.repository
  461. headref, err := repository.Head()
  462. if err != nil {
  463. return nil, err
  464. }
  465. commit, err := repository.CommitObject(headref.Hash())
  466. if err != nil {
  467. return nil, err
  468. }
  469. return []*object.Commit{commit}, nil
  470. }
  471. type sortablePipelineItems []PipelineItem
  472. func (items sortablePipelineItems) Len() int {
  473. return len(items)
  474. }
  475. func (items sortablePipelineItems) Less(i, j int) bool {
  476. return items[i].Name() < items[j].Name()
  477. }
  478. func (items sortablePipelineItems) Swap(i, j int) {
  479. items[i], items[j] = items[j], items[i]
  480. }
  481. func (pipeline *Pipeline) resolve(dumpPath string) error {
  482. graph := toposort.NewGraph()
  483. sort.Sort(sortablePipelineItems(pipeline.items))
  484. name2item := map[string]PipelineItem{}
  485. ambiguousMap := map[string][]string{}
  486. nameUsages := map[string]int{}
  487. for _, item := range pipeline.items {
  488. nameUsages[item.Name()]++
  489. }
  490. counters := map[string]int{}
  491. for _, item := range pipeline.items {
  492. name := item.Name()
  493. if nameUsages[name] > 1 {
  494. index := counters[item.Name()] + 1
  495. counters[item.Name()] = index
  496. name = fmt.Sprintf("%s_%d", item.Name(), index)
  497. }
  498. graph.AddNode(name)
  499. name2item[name] = item
  500. for _, key := range item.Provides() {
  501. key = "[" + key + "]"
  502. graph.AddNode(key)
  503. if graph.AddEdge(name, key) > 1 {
  504. if ambiguousMap[key] != nil {
  505. fmt.Fprintln(os.Stderr, "Pipeline:")
  506. for _, item2 := range pipeline.items {
  507. if item2 == item {
  508. fmt.Fprint(os.Stderr, "> ")
  509. }
  510. fmt.Fprint(os.Stderr, item2.Name(), " [")
  511. for i, key2 := range item2.Provides() {
  512. fmt.Fprint(os.Stderr, key2)
  513. if i < len(item.Provides())-1 {
  514. fmt.Fprint(os.Stderr, ", ")
  515. }
  516. }
  517. fmt.Fprintln(os.Stderr, "]")
  518. }
  519. pipeline.l.Critical("Failed to resolve pipeline dependencies: ambiguous graph.")
  520. return errors.New("ambiguous graph")
  521. }
  522. ambiguousMap[key] = graph.FindParents(key)
  523. }
  524. }
  525. }
  526. counters = map[string]int{}
  527. for _, item := range pipeline.items {
  528. name := item.Name()
  529. if nameUsages[name] > 1 {
  530. index := counters[item.Name()] + 1
  531. counters[item.Name()] = index
  532. name = fmt.Sprintf("%s_%d", item.Name(), index)
  533. }
  534. for _, key := range item.Requires() {
  535. key = "[" + key + "]"
  536. if graph.AddEdge(key, name) == 0 {
  537. pipeline.l.Criticalf("Unsatisfied dependency: %s -> %s", key, item.Name())
  538. return errors.New("unsatisfied dependency")
  539. }
  540. }
  541. }
  542. // Try to break the cycles in some known scenarios.
  543. if len(ambiguousMap) > 0 {
  544. var ambiguous []string
  545. for key := range ambiguousMap {
  546. ambiguous = append(ambiguous, key)
  547. }
  548. sort.Strings(ambiguous)
  549. bfsorder := graph.BreadthSort()
  550. bfsindex := map[string]int{}
  551. for i, s := range bfsorder {
  552. bfsindex[s] = i
  553. }
  554. for len(ambiguous) > 0 {
  555. key := ambiguous[0]
  556. ambiguous = ambiguous[1:]
  557. pair := ambiguousMap[key]
  558. inheritor := pair[1]
  559. if bfsindex[pair[1]] < bfsindex[pair[0]] {
  560. inheritor = pair[0]
  561. }
  562. removed := graph.RemoveEdge(key, inheritor)
  563. cycle := map[string]bool{}
  564. for _, node := range graph.FindCycle(key) {
  565. cycle[node] = true
  566. }
  567. if len(cycle) == 0 {
  568. cycle[inheritor] = true
  569. }
  570. if removed {
  571. graph.AddEdge(key, inheritor)
  572. }
  573. graph.RemoveEdge(inheritor, key)
  574. graph.ReindexNode(inheritor)
  575. // for all nodes key links to except those in cycle, put the link from inheritor
  576. for _, node := range graph.FindChildren(key) {
  577. if _, exists := cycle[node]; !exists {
  578. graph.AddEdge(inheritor, node)
  579. graph.RemoveEdge(key, node)
  580. }
  581. }
  582. graph.ReindexNode(key)
  583. }
  584. }
  585. var graphCopy *toposort.Graph
  586. if dumpPath != "" {
  587. graphCopy = graph.Copy()
  588. }
  589. strplan, ok := graph.Toposort()
  590. if !ok {
  591. pipeline.l.Critical("Failed to resolve pipeline dependencies: unable to topologically sort the items.")
  592. return errors.New("topological sort failure")
  593. }
  594. pipeline.items = make([]PipelineItem, 0, len(pipeline.items))
  595. for _, key := range strplan {
  596. if item, ok := name2item[key]; ok {
  597. pipeline.items = append(pipeline.items, item)
  598. }
  599. }
  600. if dumpPath != "" {
  601. // If there is a floating difference, uncomment this:
  602. // fmt.Fprint(os.Stderr, graphCopy.DebugDump())
  603. ioutil.WriteFile(dumpPath, []byte(graphCopy.Serialize(strplan)), 0666)
  604. absPath, _ := filepath.Abs(dumpPath)
  605. pipeline.l.Infof("Wrote the DAG to %s\n", absPath)
  606. }
  607. return nil
  608. }
  609. // Initialize prepares the pipeline for the execution (Run()). This function
  610. // resolves the execution DAG, Configure()-s and Initialize()-s the items in it in the
  611. // topological dependency order. `facts` are passed inside Configure(). They are mutable.
  612. func (pipeline *Pipeline) Initialize(facts map[string]interface{}) error {
  613. cleanReturn := false
  614. defer func() {
  615. if !cleanReturn {
  616. remotes, _ := pipeline.repository.Remotes()
  617. if len(remotes) > 0 {
  618. pipeline.l.Errorf("Failed to initialize the pipeline on %s", remotes[0].Config().URLs)
  619. }
  620. }
  621. }()
  622. if facts == nil {
  623. facts = map[string]interface{}{}
  624. }
  625. // set logger from facts, otherwise set the pipeline's logger as the logger
  626. // to be used by all analysis tasks by setting the fact
  627. if l, exists := facts[ConfigLogger].(Logger); exists {
  628. pipeline.l = l
  629. } else {
  630. facts[ConfigLogger] = pipeline.l
  631. }
  632. if _, exists := facts[ConfigPipelineCommits]; !exists {
  633. var err error
  634. facts[ConfigPipelineCommits], err = pipeline.Commits(false)
  635. if err != nil {
  636. pipeline.l.Errorf("failed to list the commits: %v", err)
  637. return err
  638. }
  639. }
  640. pipeline.PrintActions, _ = facts[ConfigPipelinePrintActions].(bool)
  641. if val, exists := facts[ConfigPipelineHibernationDistance].(int); exists {
  642. if val < 0 {
  643. err := fmt.Errorf("--hibernation-distance cannot be negative (got %d)", val)
  644. pipeline.l.Error(err)
  645. return err
  646. }
  647. pipeline.HibernationDistance = val
  648. }
  649. dumpPath, _ := facts[ConfigPipelineDAGPath].(string)
  650. err := pipeline.resolve(dumpPath)
  651. if err != nil {
  652. return err
  653. }
  654. if dumpPlan, exists := facts[ConfigPipelineDumpPlan].(bool); exists {
  655. pipeline.DumpPlan = dumpPlan
  656. }
  657. if dryRun, exists := facts[ConfigPipelineDryRun].(bool); exists {
  658. pipeline.DryRun = dryRun
  659. if dryRun {
  660. cleanReturn = true
  661. return nil
  662. }
  663. }
  664. for _, item := range pipeline.items {
  665. err := item.Configure(facts)
  666. if err != nil {
  667. cleanReturn = true
  668. return errors.Wrapf(err, "%s failed to configure", item.Name())
  669. }
  670. }
  671. for _, item := range pipeline.items {
  672. err := item.Initialize(pipeline.repository)
  673. if err != nil {
  674. cleanReturn = true
  675. return errors.Wrapf(err, "%s failed to initialize", item.Name())
  676. }
  677. }
  678. if pipeline.HibernationDistance > 0 {
  679. // if we want hibernation, then we want to minimize RSS
  680. debug.SetGCPercent(20) // the default is 100
  681. }
  682. cleanReturn = true
  683. return nil
  684. }
  685. // Run method executes the pipeline.
  686. //
  687. // `commits` is a slice with the git commits to analyse. Multiple branches are supported.
  688. //
  689. // Returns the mapping from each LeafPipelineItem to the corresponding analysis result.
  690. // There is always a "nil" record with CommonAnalysisResult.
  691. func (pipeline *Pipeline) Run(commits []*object.Commit) (map[LeafPipelineItem]interface{}, error) {
  692. startRunTime := time.Now()
  693. cleanReturn := false
  694. defer func() {
  695. if !cleanReturn {
  696. remotes, _ := pipeline.repository.Remotes()
  697. if len(remotes) > 0 {
  698. pipeline.l.Errorf("Failed to run the pipeline on %s", remotes[0].Config().URLs)
  699. }
  700. }
  701. }()
  702. onProgress := pipeline.OnProgress
  703. if onProgress == nil {
  704. onProgress = func(int, int, string) {}
  705. }
  706. plan := prepareRunPlan(commits, pipeline.HibernationDistance, pipeline.DumpPlan)
  707. progressSteps := len(plan) + 2
  708. branches := map[int][]PipelineItem{}
  709. // we will need rootClone if there is more than one root branch
  710. var rootClone []PipelineItem
  711. if !pipeline.DryRun {
  712. rootClone = cloneItems(pipeline.items, 1)[0]
  713. }
  714. var newestTime int64
  715. runTimePerItem := map[string]float64{}
  716. isMerge := func(index int, commit plumbing.Hash) bool {
  717. match := false
  718. // look for the same hash backward
  719. for i := index - 1; i > 0; i-- {
  720. switch plan[i].Action {
  721. case runActionHibernate, runActionBoot:
  722. continue
  723. case runActionCommit:
  724. match = plan[i].Commit.Hash == commit
  725. fallthrough
  726. default:
  727. i = 0
  728. }
  729. }
  730. if match {
  731. return true
  732. }
  733. // look for the same hash forward
  734. for i := index + 1; i < len(plan); i++ {
  735. switch plan[i].Action {
  736. case runActionHibernate, runActionBoot:
  737. continue
  738. case runActionCommit:
  739. match = plan[i].Commit.Hash == commit
  740. fallthrough
  741. default:
  742. i = len(plan)
  743. }
  744. }
  745. return match
  746. }
  747. commitIndex := 0
  748. for index, step := range plan {
  749. onProgress(index+1, progressSteps, step.String())
  750. if pipeline.DryRun {
  751. continue
  752. }
  753. if pipeline.PrintActions {
  754. printAction(step)
  755. }
  756. if index > 0 && index%100 == 0 && pipeline.HibernationDistance > 0 {
  757. debug.FreeOSMemory()
  758. }
  759. firstItem := step.Items[0]
  760. switch step.Action {
  761. case runActionCommit:
  762. state := map[string]interface{}{
  763. DependencyCommit: step.Commit,
  764. DependencyIndex: commitIndex,
  765. DependencyIsMerge: isMerge(index, step.Commit.Hash),
  766. }
  767. for _, item := range branches[firstItem] {
  768. startTime := time.Now()
  769. update, err := item.Consume(state)
  770. runTimePerItem[item.Name()] += time.Now().Sub(startTime).Seconds()
  771. if err != nil {
  772. pipeline.l.Errorf("%s failed on commit #%d (%d) %s: %v\n",
  773. item.Name(), commitIndex+1, index+1, step.Commit.Hash.String(), err)
  774. return nil, err
  775. }
  776. for _, key := range item.Provides() {
  777. val, ok := update[key]
  778. if !ok {
  779. err := fmt.Errorf("%s: Consume() did not return %s", item.Name(), key)
  780. pipeline.l.Critical(err)
  781. return nil, err
  782. }
  783. state[key] = val
  784. }
  785. }
  786. commitTime := step.Commit.Committer.When.Unix()
  787. if commitTime > newestTime {
  788. newestTime = commitTime
  789. }
  790. commitIndex++
  791. case runActionFork:
  792. startTime := time.Now()
  793. for i, clone := range cloneItems(branches[firstItem], len(step.Items)-1) {
  794. branches[step.Items[i+1]] = clone
  795. }
  796. runTimePerItem["*.Fork"] += time.Now().Sub(startTime).Seconds()
  797. case runActionMerge:
  798. startTime := time.Now()
  799. merged := make([][]PipelineItem, len(step.Items))
  800. for i, b := range step.Items {
  801. merged[i] = branches[b]
  802. }
  803. mergeItems(merged)
  804. runTimePerItem["*.Merge"] += time.Now().Sub(startTime).Seconds()
  805. case runActionEmerge:
  806. if firstItem == rootBranchIndex {
  807. branches[firstItem] = pipeline.items
  808. } else {
  809. branches[firstItem] = cloneItems(rootClone, 1)[0]
  810. }
  811. case runActionDelete:
  812. delete(branches, firstItem)
  813. case runActionHibernate:
  814. for _, item := range step.Items {
  815. for _, item := range branches[item] {
  816. if hi, ok := item.(HibernateablePipelineItem); ok {
  817. startTime := time.Now()
  818. err := hi.Hibernate()
  819. if err != nil {
  820. pipeline.l.Errorf("Failed to hibernate %s: %v\n", item.Name(), err)
  821. return nil, err
  822. }
  823. runTimePerItem[item.Name()+".Hibernation"] += time.Now().Sub(startTime).Seconds()
  824. }
  825. }
  826. }
  827. case runActionBoot:
  828. for _, item := range step.Items {
  829. for _, item := range branches[item] {
  830. if hi, ok := item.(HibernateablePipelineItem); ok {
  831. startTime := time.Now()
  832. err := hi.Boot()
  833. if err != nil {
  834. pipeline.l.Errorf("Failed to boot %s: %v\n", item.Name(), err)
  835. return nil, err
  836. }
  837. runTimePerItem[item.Name()+".Hibernation"] += time.Now().Sub(startTime).Seconds()
  838. }
  839. }
  840. }
  841. }
  842. }
  843. onProgress(len(plan)+1, progressSteps, MessageFinalize)
  844. result := map[LeafPipelineItem]interface{}{}
  845. if !pipeline.DryRun {
  846. for index, item := range getMasterBranch(branches) {
  847. if casted, ok := item.(DisposablePipelineItem); ok {
  848. casted.Dispose()
  849. }
  850. if casted, ok := item.(LeafPipelineItem); ok {
  851. result[pipeline.items[index].(LeafPipelineItem)] = casted.Finalize()
  852. }
  853. }
  854. }
  855. onProgress(progressSteps, progressSteps, "")
  856. result[nil] = &CommonAnalysisResult{
  857. BeginTime: plan[0].Commit.Committer.When.Unix(),
  858. EndTime: newestTime,
  859. CommitsNumber: len(commits),
  860. RunTime: time.Since(startRunTime),
  861. RunTimePerItem: runTimePerItem,
  862. }
  863. cleanReturn = true
  864. return result, nil
  865. }
  866. // LoadCommitsFromFile reads the file by the specified FS path and generates the sequence of commits
  867. // by interpreting each line as a Git commit hash.
  868. func LoadCommitsFromFile(path string, repository *git.Repository) ([]*object.Commit, error) {
  869. var file io.ReadCloser
  870. if path != "-" {
  871. var err error
  872. file, err = os.Open(path)
  873. if err != nil {
  874. return nil, err
  875. }
  876. defer file.Close()
  877. } else {
  878. file = os.Stdin
  879. }
  880. scanner := bufio.NewScanner(file)
  881. var commits []*object.Commit
  882. for scanner.Scan() {
  883. hash := plumbing.NewHash(scanner.Text())
  884. if len(hash) != 20 {
  885. return nil, errors.New("invalid commit hash " + scanner.Text())
  886. }
  887. commit, err := repository.CommitObject(hash)
  888. if err != nil {
  889. return nil, err
  890. }
  891. commits = append(commits, commit)
  892. }
  893. return commits, nil
  894. }
  895. // GetSensibleRemote extracts a remote URL of the repository to identify it.
  896. func GetSensibleRemote(repository *git.Repository) string {
  897. if r, err := repository.Remotes(); err == nil && len(r) > 0 {
  898. return r[0].Config().URLs[0]
  899. }
  900. return "<no remote>"
  901. }