pipeline.go 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178
  1. package core
  2. import (
  3. "bufio"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "log"
  9. "os"
  10. "path/filepath"
  11. "sort"
  12. "strings"
  13. "time"
  14. "gopkg.in/src-d/go-git.v4"
  15. "gopkg.in/src-d/go-git.v4/plumbing"
  16. "gopkg.in/src-d/go-git.v4/plumbing/object"
  17. "gopkg.in/src-d/hercules.v4/internal/pb"
  18. "gopkg.in/src-d/hercules.v4/internal/toposort"
  19. )
  20. // ConfigurationOptionType represents the possible types of a ConfigurationOption's value.
  21. type ConfigurationOptionType int
  22. const (
  23. // BoolConfigurationOption reflects the boolean value type.
  24. BoolConfigurationOption ConfigurationOptionType = iota
  25. // IntConfigurationOption reflects the integer value type.
  26. IntConfigurationOption
  27. // StringConfigurationOption reflects the string value type.
  28. StringConfigurationOption
  29. // FloatConfigurationOption reflects a floating point value type.
  30. FloatConfigurationOption
  31. // StringsConfigurationOption reflects the array of strings value type.
  32. StringsConfigurationOption
  33. )
  34. // String() returns an empty string for the boolean type, "int" for integers and "string" for
  35. // strings. It is used in the command line interface to show the argument's type.
  36. func (opt ConfigurationOptionType) String() string {
  37. switch opt {
  38. case BoolConfigurationOption:
  39. return ""
  40. case IntConfigurationOption:
  41. return "int"
  42. case StringConfigurationOption:
  43. return "string"
  44. case FloatConfigurationOption:
  45. return "float"
  46. case StringsConfigurationOption:
  47. return "string"
  48. }
  49. panic(fmt.Sprintf("Invalid ConfigurationOptionType value %d", opt))
  50. }
  51. // ConfigurationOption allows for the unified, retrospective way to setup PipelineItem-s.
  52. type ConfigurationOption struct {
  53. // Name identifies the configuration option in facts.
  54. Name string
  55. // Description represents the help text about the configuration option.
  56. Description string
  57. // Flag corresponds to the CLI token with "--" prepended.
  58. Flag string
  59. // Type specifies the kind of the configuration option's value.
  60. Type ConfigurationOptionType
  61. // Default is the initial value of the configuration option.
  62. Default interface{}
  63. }
  64. // FormatDefault converts the default value of ConfigurationOption to string.
  65. // Used in the command line interface to show the argument's default value.
  66. func (opt ConfigurationOption) FormatDefault() string {
  67. if opt.Type == StringsConfigurationOption {
  68. return fmt.Sprintf("\"%s\"", strings.Join(opt.Default.([]string), ","))
  69. }
  70. if opt.Type != StringConfigurationOption {
  71. return fmt.Sprint(opt.Default)
  72. }
  73. return fmt.Sprintf("\"%s\"", opt.Default)
  74. }
  75. // PipelineItem is the interface for all the units in the Git commits analysis pipeline.
  76. type PipelineItem interface {
  77. // Name returns the name of the analysis.
  78. Name() string
  79. // Provides returns the list of keys of reusable calculated entities.
  80. // Other items may depend on them.
  81. Provides() []string
  82. // Requires returns the list of keys of needed entities which must be supplied in Consume().
  83. Requires() []string
  84. // ListConfigurationOptions returns the list of available options which can be consumed by Configure().
  85. ListConfigurationOptions() []ConfigurationOption
  86. // Configure performs the initial setup of the object by applying parameters from facts.
  87. // It allows to create PipelineItems in a universal way.
  88. Configure(facts map[string]interface{})
  89. // Initialize prepares and resets the item. Consume() requires Initialize()
  90. // to be called at least once beforehand.
  91. Initialize(*git.Repository)
  92. // Consume processes the next commit.
  93. // deps contains the required entities which match Depends(). Besides, it always includes
  94. // DependencyCommit and DependencyIndex.
  95. // Returns the calculated entities which match Provides().
  96. Consume(deps map[string]interface{}) (map[string]interface{}, error)
  97. // Fork clones the item the requested number of times. The data links between the clones
  98. // are up to the implementation. Needed to handle Git branches. See also Merge().
  99. // Returns a slice with `n` fresh clones. In other words, it does not include the original item.
  100. Fork(n int) []PipelineItem
  101. // Merge combines several branches together. Each is supposed to have been created with Fork().
  102. // The result is stored in the called item, thus this function returns nothing.
  103. // Merge() must update all the branches, not only self. When several branches merge, some of
  104. // them may continue to live, hence this requirement.
  105. Merge(branches []PipelineItem)
  106. }
  107. // FeaturedPipelineItem enables switching the automatic insertion of pipeline items on or off.
  108. type FeaturedPipelineItem interface {
  109. PipelineItem
  110. // Features returns the list of names which enable this item to be automatically inserted
  111. // in Pipeline.DeployItem().
  112. Features() []string
  113. }
  114. // LeafPipelineItem corresponds to the top level pipeline items which produce the end results.
  115. type LeafPipelineItem interface {
  116. PipelineItem
  117. // Flag returns the cmdline name of the item.
  118. Flag() string
  119. // Finalize returns the result of the analysis.
  120. Finalize() interface{}
  121. // Serialize encodes the object returned by Finalize() to YAML or Protocol Buffers.
  122. Serialize(result interface{}, binary bool, writer io.Writer) error
  123. }
  124. // MergeablePipelineItem specifies the methods to combine several analysis results together.
  125. type MergeablePipelineItem interface {
  126. LeafPipelineItem
  127. // Deserialize loads the result from Protocol Buffers blob.
  128. Deserialize(pbmessage []byte) (interface{}, error)
  129. // MergeResults joins two results together. Common-s are specified as the global state.
  130. MergeResults(r1, r2 interface{}, c1, c2 *CommonAnalysisResult) interface{}
  131. }
  132. // CommonAnalysisResult holds the information which is always extracted at Pipeline.Run().
  133. type CommonAnalysisResult struct {
  134. // Time of the first commit in the analysed sequence.
  135. BeginTime int64
  136. // Time of the last commit in the analysed sequence.
  137. EndTime int64
  138. // The number of commits in the analysed sequence.
  139. CommitsNumber int
  140. // The duration of Pipeline.Run().
  141. RunTime time.Duration
  142. }
  143. // BeginTimeAsTime converts the UNIX timestamp of the beginning to Go time.
  144. func (car *CommonAnalysisResult) BeginTimeAsTime() time.Time {
  145. return time.Unix(car.BeginTime, 0)
  146. }
  147. // EndTimeAsTime converts the UNIX timestamp of the ending to Go time.
  148. func (car *CommonAnalysisResult) EndTimeAsTime() time.Time {
  149. return time.Unix(car.EndTime, 0)
  150. }
  151. // Merge combines the CommonAnalysisResult with an other one.
  152. // We choose the earlier BeginTime, the later EndTime, sum the number of commits and the
  153. // elapsed run times.
  154. func (car *CommonAnalysisResult) Merge(other *CommonAnalysisResult) {
  155. if car.EndTime == 0 || other.BeginTime == 0 {
  156. panic("Merging with an uninitialized CommonAnalysisResult")
  157. }
  158. if other.BeginTime < car.BeginTime {
  159. car.BeginTime = other.BeginTime
  160. }
  161. if other.EndTime > car.EndTime {
  162. car.EndTime = other.EndTime
  163. }
  164. car.CommitsNumber += other.CommitsNumber
  165. car.RunTime += other.RunTime
  166. }
  167. // FillMetadata copies the data to a Protobuf message.
  168. func (car *CommonAnalysisResult) FillMetadata(meta *pb.Metadata) *pb.Metadata {
  169. meta.BeginUnixTime = car.BeginTime
  170. meta.EndUnixTime = car.EndTime
  171. meta.Commits = int32(car.CommitsNumber)
  172. meta.RunTime = car.RunTime.Nanoseconds() / 1e6
  173. return meta
  174. }
  175. // Metadata is defined in internal/pb/pb.pb.go - header of the binary file.
  176. type Metadata = pb.Metadata
  177. // MetadataToCommonAnalysisResult copies the data from a Protobuf message.
  178. func MetadataToCommonAnalysisResult(meta *Metadata) *CommonAnalysisResult {
  179. return &CommonAnalysisResult{
  180. BeginTime: meta.BeginUnixTime,
  181. EndTime: meta.EndUnixTime,
  182. CommitsNumber: int(meta.Commits),
  183. RunTime: time.Duration(meta.RunTime * 1e6),
  184. }
  185. }
  186. // Pipeline is the core Hercules entity which carries several PipelineItems and executes them.
  187. // See the extended example of how a Pipeline works in doc.go
  188. type Pipeline struct {
  189. // OnProgress is the callback which is invoked in Analyse() to output it's
  190. // progress. The first argument is the number of complete steps and the
  191. // second is the total number of steps.
  192. OnProgress func(int, int)
  193. // Repository points to the analysed Git repository struct from go-git.
  194. repository *git.Repository
  195. // Items are the registered building blocks in the pipeline. The order defines the
  196. // execution sequence.
  197. items []PipelineItem
  198. // The collection of parameters to create items.
  199. facts map[string]interface{}
  200. // Feature flags which enable the corresponding items.
  201. features map[string]bool
  202. }
  203. const (
  204. // ConfigPipelineDumpPath is the name of the Pipeline configuration option (Pipeline.Initialize())
  205. // which enables saving the items DAG to the specified file.
  206. ConfigPipelineDumpPath = "Pipeline.DumpPath"
  207. // ConfigPipelineDryRun is the name of the Pipeline configuration option (Pipeline.Initialize())
  208. // which disables Configure() and Initialize() invocation on each PipelineItem during the
  209. // Pipeline initialization.
  210. // Subsequent Run() calls are going to fail. Useful with ConfigPipelineDumpPath=true.
  211. ConfigPipelineDryRun = "Pipeline.DryRun"
  212. // ConfigPipelineCommits is the name of the Pipeline configuration option (Pipeline.Initialize())
  213. // which allows to specify the custom commit sequence. By default, Pipeline.Commits() is used.
  214. ConfigPipelineCommits = "commits"
  215. // DependencyCommit is the name of one of the two items in `deps` supplied to PipelineItem.Consume()
  216. // which always exists. It corresponds to the currently analyzed commit.
  217. DependencyCommit = "commit"
  218. // DependencyIndex is the name of one of the two items in `deps` supplied to PipelineItem.Consume()
  219. // which always exists. It corresponds to the currently analyzed commit's index.
  220. DependencyIndex = "index"
  221. )
  222. // NewPipeline initializes a new instance of Pipeline struct.
  223. func NewPipeline(repository *git.Repository) *Pipeline {
  224. return &Pipeline{
  225. repository: repository,
  226. items: []PipelineItem{},
  227. facts: map[string]interface{}{},
  228. features: map[string]bool{},
  229. }
  230. }
  231. // GetFact returns the value of the fact with the specified name.
  232. func (pipeline *Pipeline) GetFact(name string) interface{} {
  233. return pipeline.facts[name]
  234. }
  235. // SetFact sets the value of the fact with the specified name.
  236. func (pipeline *Pipeline) SetFact(name string, value interface{}) {
  237. pipeline.facts[name] = value
  238. }
  239. // GetFeature returns the state of the feature with the specified name (enabled/disabled) and
  240. // whether it exists. See also: FeaturedPipelineItem.
  241. func (pipeline *Pipeline) GetFeature(name string) (bool, bool) {
  242. val, exists := pipeline.features[name]
  243. return val, exists
  244. }
  245. // SetFeature sets the value of the feature with the specified name.
  246. // See also: FeaturedPipelineItem.
  247. func (pipeline *Pipeline) SetFeature(name string) {
  248. pipeline.features[name] = true
  249. }
  250. // SetFeaturesFromFlags enables the features which were specified through the command line flags
  251. // which belong to the given PipelineItemRegistry instance.
  252. // See also: AddItem().
  253. func (pipeline *Pipeline) SetFeaturesFromFlags(registry ...*PipelineItemRegistry) {
  254. var ffr *PipelineItemRegistry
  255. if len(registry) == 0 {
  256. ffr = Registry
  257. } else if len(registry) == 1 {
  258. ffr = registry[0]
  259. } else {
  260. panic("Zero or one registry is allowed to be passed.")
  261. }
  262. for _, feature := range ffr.featureFlags.Flags {
  263. pipeline.SetFeature(feature)
  264. }
  265. }
  266. // DeployItem inserts a PipelineItem into the pipeline. It also recursively creates all of it's
  267. // dependencies (PipelineItem.Requires()). Returns the same item as specified in the arguments.
  268. func (pipeline *Pipeline) DeployItem(item PipelineItem) PipelineItem {
  269. fpi, ok := item.(FeaturedPipelineItem)
  270. if ok {
  271. for _, f := range fpi.Features() {
  272. pipeline.SetFeature(f)
  273. }
  274. }
  275. queue := []PipelineItem{}
  276. queue = append(queue, item)
  277. added := map[string]PipelineItem{}
  278. for _, item := range pipeline.items {
  279. added[item.Name()] = item
  280. }
  281. added[item.Name()] = item
  282. pipeline.AddItem(item)
  283. for len(queue) > 0 {
  284. head := queue[0]
  285. queue = queue[1:]
  286. for _, dep := range head.Requires() {
  287. for _, sibling := range Registry.Summon(dep) {
  288. if _, exists := added[sibling.Name()]; !exists {
  289. disabled := false
  290. // If this item supports features, check them against the activated in pipeline.features
  291. if fpi, matches := sibling.(FeaturedPipelineItem); matches {
  292. for _, feature := range fpi.Features() {
  293. if !pipeline.features[feature] {
  294. disabled = true
  295. break
  296. }
  297. }
  298. }
  299. if disabled {
  300. continue
  301. }
  302. added[sibling.Name()] = sibling
  303. queue = append(queue, sibling)
  304. pipeline.AddItem(sibling)
  305. }
  306. }
  307. }
  308. }
  309. return item
  310. }
  311. // AddItem inserts a PipelineItem into the pipeline. It does not check any dependencies.
  312. // See also: DeployItem().
  313. func (pipeline *Pipeline) AddItem(item PipelineItem) PipelineItem {
  314. pipeline.items = append(pipeline.items, item)
  315. return item
  316. }
  317. // RemoveItem deletes a PipelineItem from the pipeline. It leaves all the rest of the items intact.
  318. func (pipeline *Pipeline) RemoveItem(item PipelineItem) {
  319. for i, reg := range pipeline.items {
  320. if reg == item {
  321. pipeline.items = append(pipeline.items[:i], pipeline.items[i+1:]...)
  322. return
  323. }
  324. }
  325. }
  326. // Len returns the number of items in the pipeline.
  327. func (pipeline *Pipeline) Len() int {
  328. return len(pipeline.items)
  329. }
  330. // Commits returns the critical path in the repository's history. It starts
  331. // from HEAD and traces commits backwards till the root. When it encounters
  332. // a merge (more than one parent), it always chooses the first parent.
  333. func (pipeline *Pipeline) Commits() []*object.Commit {
  334. result := []*object.Commit{}
  335. repository := pipeline.repository
  336. head, err := repository.Head()
  337. if err != nil {
  338. panic(err)
  339. }
  340. commit, err := repository.CommitObject(head.Hash())
  341. if err != nil {
  342. panic(err)
  343. }
  344. // the first parent matches the head
  345. for ; err != io.EOF; commit, err = commit.Parents().Next() {
  346. if err != nil {
  347. panic(err)
  348. }
  349. result = append(result, commit)
  350. }
  351. // reverse the order
  352. for i, j := 0, len(result)-1; i < j; i, j = i+1, j-1 {
  353. result[i], result[j] = result[j], result[i]
  354. }
  355. return result
  356. }
  357. type sortablePipelineItems []PipelineItem
  358. func (items sortablePipelineItems) Len() int {
  359. return len(items)
  360. }
  361. func (items sortablePipelineItems) Less(i, j int) bool {
  362. return items[i].Name() < items[j].Name()
  363. }
  364. func (items sortablePipelineItems) Swap(i, j int) {
  365. items[i], items[j] = items[j], items[i]
  366. }
  367. func (pipeline *Pipeline) resolve(dumpPath string) {
  368. graph := toposort.NewGraph()
  369. sort.Sort(sortablePipelineItems(pipeline.items))
  370. name2item := map[string]PipelineItem{}
  371. ambiguousMap := map[string][]string{}
  372. nameUsages := map[string]int{}
  373. for _, item := range pipeline.items {
  374. nameUsages[item.Name()]++
  375. }
  376. counters := map[string]int{}
  377. for _, item := range pipeline.items {
  378. name := item.Name()
  379. if nameUsages[name] > 1 {
  380. index := counters[item.Name()] + 1
  381. counters[item.Name()] = index
  382. name = fmt.Sprintf("%s_%d", item.Name(), index)
  383. }
  384. graph.AddNode(name)
  385. name2item[name] = item
  386. for _, key := range item.Provides() {
  387. key = "[" + key + "]"
  388. graph.AddNode(key)
  389. if graph.AddEdge(name, key) > 1 {
  390. if ambiguousMap[key] != nil {
  391. fmt.Fprintln(os.Stderr, "Pipeline:")
  392. for _, item2 := range pipeline.items {
  393. if item2 == item {
  394. fmt.Fprint(os.Stderr, "> ")
  395. }
  396. fmt.Fprint(os.Stderr, item2.Name(), " [")
  397. for i, key2 := range item2.Provides() {
  398. fmt.Fprint(os.Stderr, key2)
  399. if i < len(item.Provides())-1 {
  400. fmt.Fprint(os.Stderr, ", ")
  401. }
  402. }
  403. fmt.Fprintln(os.Stderr, "]")
  404. }
  405. panic("Failed to resolve pipeline dependencies: ambiguous graph.")
  406. }
  407. ambiguousMap[key] = graph.FindParents(key)
  408. }
  409. }
  410. }
  411. counters = map[string]int{}
  412. for _, item := range pipeline.items {
  413. name := item.Name()
  414. if nameUsages[name] > 1 {
  415. index := counters[item.Name()] + 1
  416. counters[item.Name()] = index
  417. name = fmt.Sprintf("%s_%d", item.Name(), index)
  418. }
  419. for _, key := range item.Requires() {
  420. key = "[" + key + "]"
  421. if graph.AddEdge(key, name) == 0 {
  422. panic(fmt.Sprintf("Unsatisfied dependency: %s -> %s", key, item.Name()))
  423. }
  424. }
  425. }
  426. // Try to break the cycles in some known scenarios.
  427. if len(ambiguousMap) > 0 {
  428. ambiguous := []string{}
  429. for key := range ambiguousMap {
  430. ambiguous = append(ambiguous, key)
  431. }
  432. sort.Strings(ambiguous)
  433. bfsorder := graph.BreadthSort()
  434. bfsindex := map[string]int{}
  435. for i, s := range bfsorder {
  436. bfsindex[s] = i
  437. }
  438. for len(ambiguous) > 0 {
  439. key := ambiguous[0]
  440. ambiguous = ambiguous[1:]
  441. pair := ambiguousMap[key]
  442. inheritor := pair[1]
  443. if bfsindex[pair[1]] < bfsindex[pair[0]] {
  444. inheritor = pair[0]
  445. }
  446. removed := graph.RemoveEdge(key, inheritor)
  447. cycle := map[string]bool{}
  448. for _, node := range graph.FindCycle(key) {
  449. cycle[node] = true
  450. }
  451. if len(cycle) == 0 {
  452. cycle[inheritor] = true
  453. }
  454. if removed {
  455. graph.AddEdge(key, inheritor)
  456. }
  457. graph.RemoveEdge(inheritor, key)
  458. graph.ReindexNode(inheritor)
  459. // for all nodes key links to except those in cycle, put the link from inheritor
  460. for _, node := range graph.FindChildren(key) {
  461. if _, exists := cycle[node]; !exists {
  462. graph.AddEdge(inheritor, node)
  463. graph.RemoveEdge(key, node)
  464. }
  465. }
  466. graph.ReindexNode(key)
  467. }
  468. }
  469. var graphCopy *toposort.Graph
  470. if dumpPath != "" {
  471. graphCopy = graph.Copy()
  472. }
  473. strplan, ok := graph.Toposort()
  474. if !ok {
  475. panic("Failed to resolve pipeline dependencies: unable to topologically sort the items.")
  476. }
  477. pipeline.items = make([]PipelineItem, 0, len(pipeline.items))
  478. for _, key := range strplan {
  479. if item, ok := name2item[key]; ok {
  480. pipeline.items = append(pipeline.items, item)
  481. }
  482. }
  483. if dumpPath != "" {
  484. // If there is a floating difference, uncomment this:
  485. // fmt.Fprint(os.Stderr, graphCopy.DebugDump())
  486. ioutil.WriteFile(dumpPath, []byte(graphCopy.Serialize(strplan)), 0666)
  487. absPath, _ := filepath.Abs(dumpPath)
  488. log.Printf("Wrote the DAG to %s\n", absPath)
  489. }
  490. }
  491. // Initialize prepares the pipeline for the execution (Run()). This function
  492. // resolves the execution DAG, Configure()-s and Initialize()-s the items in it in the
  493. // topological dependency order. `facts` are passed inside Configure(). They are mutable.
  494. func (pipeline *Pipeline) Initialize(facts map[string]interface{}) {
  495. if facts == nil {
  496. facts = map[string]interface{}{}
  497. }
  498. if _, exists := facts[ConfigPipelineCommits]; !exists {
  499. facts[ConfigPipelineCommits] = pipeline.Commits()
  500. }
  501. dumpPath, _ := facts[ConfigPipelineDumpPath].(string)
  502. pipeline.resolve(dumpPath)
  503. if dryRun, _ := facts[ConfigPipelineDryRun].(bool); dryRun {
  504. return
  505. }
  506. for _, item := range pipeline.items {
  507. item.Configure(facts)
  508. }
  509. for _, item := range pipeline.items {
  510. item.Initialize(pipeline.repository)
  511. }
  512. }
  513. // Run method executes the pipeline.
  514. //
  515. // `commits` is a slice with the git commits to analyse. Multiple branches are supported.
  516. //
  517. // Returns the mapping from each LeafPipelineItem to the corresponding analysis result.
  518. // There is always a "nil" record with CommonAnalysisResult.
  519. func (pipeline *Pipeline) Run(commits []*object.Commit) (map[LeafPipelineItem]interface{}, error) {
  520. startRunTime := time.Now()
  521. onProgress := pipeline.OnProgress
  522. if onProgress == nil {
  523. onProgress = func(int, int) {}
  524. }
  525. plan := prepareRunPlan(commits)
  526. progressSteps := len(plan) + 2
  527. branches := map[int][]PipelineItem{0: pipeline.items}
  528. for index, step := range plan {
  529. onProgress(index + 1, progressSteps)
  530. firstItem := step.Items[0]
  531. switch step.Action {
  532. case runActionCommit:
  533. state := map[string]interface{}{
  534. DependencyCommit: step.Commit,
  535. DependencyIndex: index,
  536. }
  537. for _, item := range branches[firstItem] {
  538. update, err := item.Consume(state)
  539. if err != nil {
  540. log.Printf("%s failed on commit #%d %s\n",
  541. item.Name(), index + 1, step.Commit.Hash.String())
  542. return nil, err
  543. }
  544. for _, key := range item.Provides() {
  545. val, ok := update[key]
  546. if !ok {
  547. panic(fmt.Sprintf("%s: Consume() did not return %s", item.Name(), key))
  548. }
  549. state[key] = val
  550. }
  551. }
  552. case runActionFork:
  553. for i, clone := range cloneItems(branches[firstItem], len(step.Items)-1) {
  554. branches[step.Items[i+1]] = clone
  555. }
  556. case runActionMerge:
  557. merged := make([][]PipelineItem, len(step.Items))
  558. for i, b := range step.Items {
  559. merged[i] = branches[b]
  560. }
  561. mergeItems(merged)
  562. case runActionDelete:
  563. delete(branches, firstItem)
  564. }
  565. }
  566. onProgress(len(plan) + 1, progressSteps)
  567. result := map[LeafPipelineItem]interface{}{}
  568. for _, item := range getMasterBranch(branches) {
  569. if casted, ok := item.(LeafPipelineItem); ok {
  570. result[casted] = casted.Finalize()
  571. }
  572. }
  573. onProgress(progressSteps, progressSteps)
  574. result[nil] = &CommonAnalysisResult{
  575. BeginTime: commits[0].Author.When.Unix(),
  576. EndTime: commits[len(commits)-1].Author.When.Unix(),
  577. CommitsNumber: len(commits),
  578. RunTime: time.Since(startRunTime),
  579. }
  580. return result, nil
  581. }
  582. // LoadCommitsFromFile reads the file by the specified FS path and generates the sequence of commits
  583. // by interpreting each line as a Git commit hash.
  584. func LoadCommitsFromFile(path string, repository *git.Repository) ([]*object.Commit, error) {
  585. var file io.ReadCloser
  586. if path != "-" {
  587. var err error
  588. file, err = os.Open(path)
  589. if err != nil {
  590. return nil, err
  591. }
  592. defer file.Close()
  593. } else {
  594. file = os.Stdin
  595. }
  596. scanner := bufio.NewScanner(file)
  597. var commits []*object.Commit
  598. for scanner.Scan() {
  599. hash := plumbing.NewHash(scanner.Text())
  600. if len(hash) != 20 {
  601. return nil, errors.New("invalid commit hash " + scanner.Text())
  602. }
  603. commit, err := repository.CommitObject(hash)
  604. if err != nil {
  605. return nil, err
  606. }
  607. commits = append(commits, commit)
  608. }
  609. return commits, nil
  610. }
  611. const (
  612. // runActionCommit corresponds to a regular commit
  613. runActionCommit = 0
  614. // runActionFork splits a branch into several parts
  615. runActionFork = iota
  616. // runActionMerge merges several branches together
  617. runActionMerge = iota
  618. // runActionDelete removes the branch as it is no longer needed
  619. runActionDelete = iota
  620. )
  621. type runAction struct {
  622. Action int
  623. Commit *object.Commit
  624. Items []int
  625. }
  626. func cloneItems(origin []PipelineItem, n int) [][]PipelineItem {
  627. clones := make([][]PipelineItem, n)
  628. for j := 0; j < n; j++ {
  629. clones[j] = make([]PipelineItem, len(origin))
  630. }
  631. for i, item := range origin {
  632. itemClones := item.Fork(n)
  633. for j := 0; j < n; j++ {
  634. clones[j][i] = itemClones[j]
  635. }
  636. }
  637. return clones
  638. }
  639. func mergeItems(branches [][]PipelineItem) {
  640. buffer := make([]PipelineItem, len(branches) - 1)
  641. for i, item := range branches[0] {
  642. for j := 0; j < len(branches)-1; j++ {
  643. buffer[j] = branches[j+1][i]
  644. }
  645. item.Merge(buffer)
  646. }
  647. }
  648. // getMasterBranch returns the branch with the smallest index.
  649. func getMasterBranch(branches map[int][]PipelineItem) []PipelineItem {
  650. minKey := 1 << 31
  651. var minVal []PipelineItem
  652. for key, val := range branches {
  653. if key < minKey {
  654. minKey = key
  655. minVal = val
  656. }
  657. }
  658. return minVal
  659. }
  660. // prepareRunPlan schedules the actions for Pipeline.Run().
  661. func prepareRunPlan(commits []*object.Commit) []runAction {
  662. hashes, dag := buildDag(commits)
  663. leaveRootComponent(hashes, dag)
  664. numParents := bindNumParents(hashes, dag)
  665. mergedDag, mergedSeq := mergeDag(numParents, hashes, dag)
  666. orderNodes := bindOrderNodes(mergedDag)
  667. collapseFastForwards(orderNodes, hashes, mergedDag, dag, mergedSeq)
  668. /*fmt.Printf("digraph Hercules {\n")
  669. for i, c := range order {
  670. commit := hashes[c]
  671. fmt.Printf(" \"%s\"[label=\"[%d] %s\"]\n", commit.Hash.String(), i, commit.Hash.String()[:6])
  672. for _, child := range mergedDag[commit.Hash] {
  673. fmt.Printf(" \"%s\" -> \"%s\"\n", commit.Hash.String(), child.Hash.String())
  674. }
  675. }
  676. fmt.Printf("}\n")*/
  677. plan := generatePlan(orderNodes, numParents, hashes, mergedDag, dag, mergedSeq)
  678. plan = optimizePlan(plan)
  679. return plan
  680. }
  681. // buildDag generates the raw commit DAG and the commit hash map.
  682. func buildDag(commits []*object.Commit) (
  683. map[string]*object.Commit, map[plumbing.Hash][]*object.Commit) {
  684. hashes := map[string]*object.Commit{}
  685. for _, commit := range commits {
  686. hashes[commit.Hash.String()] = commit
  687. }
  688. dag := map[plumbing.Hash][]*object.Commit{}
  689. for _, commit := range commits {
  690. if _, exists := dag[commit.Hash]; !exists {
  691. dag[commit.Hash] = make([]*object.Commit, 0, 1)
  692. }
  693. for _, parent := range commit.ParentHashes {
  694. if _, exists := hashes[parent.String()]; !exists {
  695. continue
  696. }
  697. children := dag[parent]
  698. if children == nil {
  699. children = make([]*object.Commit, 0, 1)
  700. }
  701. dag[parent] = append(children, commit)
  702. }
  703. }
  704. return hashes, dag
  705. }
  706. // bindNumParents returns curried "numParents" function.
  707. func bindNumParents(
  708. hashes map[string]*object.Commit,
  709. dag map[plumbing.Hash][]*object.Commit) func(c *object.Commit) int {
  710. return func(c *object.Commit) int {
  711. r := 0
  712. for _, parent := range c.ParentHashes {
  713. if p, exists := hashes[parent.String()]; exists {
  714. for _, pc := range dag[p.Hash] {
  715. if pc.Hash == c.Hash {
  716. r++
  717. break
  718. }
  719. }
  720. }
  721. }
  722. return r
  723. }
  724. }
  725. // leaveRootComponent runs connected components analysis and throws away everything
  726. // but the part which grows from the root.
  727. func leaveRootComponent(
  728. hashes map[string]*object.Commit,
  729. dag map[plumbing.Hash][]*object.Commit) {
  730. visited := map[plumbing.Hash]bool{}
  731. var sets [][]plumbing.Hash
  732. for key := range dag {
  733. if visited[key] {
  734. continue
  735. }
  736. var set []plumbing.Hash
  737. for queue := []plumbing.Hash{key}; len(queue) > 0; {
  738. head := queue[len(queue)-1]
  739. queue = queue[:len(queue)-1]
  740. if visited[head] {
  741. continue
  742. }
  743. set = append(set, head)
  744. visited[head] = true
  745. for _, c := range dag[head] {
  746. if !visited[c.Hash] {
  747. queue = append(queue, c.Hash)
  748. }
  749. }
  750. if commit, exists := hashes[head.String()]; exists {
  751. for _, p := range commit.ParentHashes {
  752. if !visited[p] {
  753. if _, exists := hashes[p.String()]; exists {
  754. queue = append(queue, p)
  755. }
  756. }
  757. }
  758. }
  759. }
  760. sets = append(sets, set)
  761. }
  762. if len(sets) > 1 {
  763. maxlen := 0
  764. maxind := -1
  765. for i, set := range sets {
  766. if len(set) > maxlen {
  767. maxlen = len(set)
  768. maxind = i
  769. }
  770. }
  771. for i, set := range sets {
  772. if i == maxind {
  773. continue
  774. }
  775. for _, h := range set {
  776. log.Printf("warning: dropped %s from the analysis - disjoint", h.String())
  777. delete(dag, h)
  778. delete(hashes, h.String())
  779. }
  780. }
  781. }
  782. }
  783. // bindOrderNodes returns curried "orderNodes" function.
  784. func bindOrderNodes(mergedDag map[plumbing.Hash][]*object.Commit) func(reverse bool) []string {
  785. return func(reverse bool) []string {
  786. graph := toposort.NewGraph()
  787. keys := make([]plumbing.Hash, 0, len(mergedDag))
  788. for key := range mergedDag {
  789. keys = append(keys, key)
  790. }
  791. sort.Slice(keys, func(i, j int) bool { return keys[i].String() < keys[j].String() })
  792. for _, key := range keys {
  793. graph.AddNode(key.String())
  794. }
  795. for _, key := range keys {
  796. children := mergedDag[key]
  797. sort.Slice(children, func(i, j int) bool {
  798. return children[i].Hash.String() < children[j].Hash.String()
  799. })
  800. for _, c := range children {
  801. graph.AddEdge(key.String(), c.Hash.String())
  802. }
  803. }
  804. order, ok := graph.Toposort()
  805. if !ok {
  806. // should never happen
  807. panic("Could not topologically sort the DAG of commits")
  808. }
  809. if reverse {
  810. // one day this must appear in the standard library...
  811. for i, j := 0, len(order)-1; i < len(order)/2; i, j = i+1, j-1 {
  812. order[i], order[j] = order[j], order[i]
  813. }
  814. }
  815. return order
  816. }
  817. }
  818. // mergeDag turns sequences of consecutive commits into single nodes.
  819. func mergeDag(
  820. numParents func(c *object.Commit) int,
  821. hashes map[string]*object.Commit,
  822. dag map[plumbing.Hash][]*object.Commit) (
  823. mergedDag, mergedSeq map[plumbing.Hash][]*object.Commit) {
  824. parentOf := func(c *object.Commit) plumbing.Hash {
  825. var parent plumbing.Hash
  826. for _, p := range c.ParentHashes {
  827. if _, exists := hashes[p.String()]; exists {
  828. if parent != plumbing.ZeroHash {
  829. // more than one parent
  830. return plumbing.ZeroHash
  831. }
  832. parent = p
  833. }
  834. }
  835. return parent
  836. }
  837. mergedDag = map[plumbing.Hash][]*object.Commit{}
  838. mergedSeq = map[plumbing.Hash][]*object.Commit{}
  839. visited := map[plumbing.Hash]bool{}
  840. for ch := range dag {
  841. c := hashes[ch.String()]
  842. if visited[c.Hash] {
  843. continue
  844. }
  845. for true {
  846. parent := parentOf(c)
  847. if parent == plumbing.ZeroHash || len(dag[parent]) != 1 {
  848. break
  849. }
  850. c = hashes[parent.String()]
  851. }
  852. head := c
  853. var seq []*object.Commit
  854. children := dag[c.Hash]
  855. for true {
  856. visited[c.Hash] = true
  857. seq = append(seq, c)
  858. if len(children) != 1 {
  859. break
  860. }
  861. c = children[0]
  862. children = dag[c.Hash]
  863. if numParents(c) != 1 {
  864. break
  865. }
  866. }
  867. mergedSeq[head.Hash] = seq
  868. mergedDag[head.Hash] = dag[seq[len(seq)-1].Hash]
  869. }
  870. return
  871. }
  872. // collapseFastForwards removes the fast forward merges.
  873. func collapseFastForwards(
  874. orderNodes func(reverse bool) []string,
  875. hashes map[string]*object.Commit,
  876. mergedDag, dag, mergedSeq map[plumbing.Hash][]*object.Commit) {
  877. for _, strkey := range orderNodes(true) {
  878. key := hashes[strkey].Hash
  879. vals, exists := mergedDag[key]
  880. if !exists {
  881. continue
  882. }
  883. if len(vals) == 2 {
  884. grand1 := mergedDag[vals[0].Hash]
  885. grand2 := mergedDag[vals[1].Hash]
  886. if len(grand2) == 1 && vals[0].Hash == grand2[0].Hash {
  887. mergedDag[key] = mergedDag[vals[0].Hash]
  888. dag[key] = vals[1:]
  889. delete(mergedDag, vals[0].Hash)
  890. delete(mergedDag, vals[1].Hash)
  891. mergedSeq[key] = append(mergedSeq[key], mergedSeq[vals[1].Hash]...)
  892. mergedSeq[key] = append(mergedSeq[key], mergedSeq[vals[0].Hash]...)
  893. delete(mergedSeq, vals[0].Hash)
  894. delete(mergedSeq, vals[1].Hash)
  895. }
  896. // symmetric
  897. if len(grand1) == 1 && vals[1].Hash == grand1[0].Hash {
  898. mergedDag[key] = mergedDag[vals[1].Hash]
  899. dag[key] = vals[:1]
  900. delete(mergedDag, vals[0].Hash)
  901. delete(mergedDag, vals[1].Hash)
  902. mergedSeq[key] = append(mergedSeq[key], mergedSeq[vals[0].Hash]...)
  903. mergedSeq[key] = append(mergedSeq[key], mergedSeq[vals[1].Hash]...)
  904. delete(mergedSeq, vals[0].Hash)
  905. delete(mergedSeq, vals[1].Hash)
  906. }
  907. }
  908. }
  909. }
  910. // generatePlan creates the list of actions from the commit DAG.
  911. func generatePlan(
  912. orderNodes func(reverse bool) []string,
  913. numParents func(c *object.Commit) int,
  914. hashes map[string]*object.Commit,
  915. mergedDag, dag, mergedSeq map[plumbing.Hash][]*object.Commit) []runAction {
  916. var plan []runAction
  917. branches := map[plumbing.Hash]int{}
  918. counter := 1
  919. for seqIndex, name := range orderNodes(false) {
  920. commit := hashes[name]
  921. if seqIndex == 0 {
  922. branches[commit.Hash] = 0
  923. }
  924. var branch int
  925. {
  926. var exists bool
  927. branch, exists = branches[commit.Hash]
  928. if !exists {
  929. branch = -1
  930. }
  931. }
  932. branchExists := func() bool { return branch >= 0 }
  933. appendCommit := func(c *object.Commit, branch int) {
  934. plan = append(plan, runAction{
  935. Action: runActionCommit,
  936. Commit: c,
  937. Items: []int{branch},
  938. })
  939. }
  940. appendMergeIfNeeded := func() {
  941. if numParents(commit) < 2 {
  942. return
  943. }
  944. // merge after the merge commit (the first in the sequence)
  945. var items []int
  946. minBranch := 1 << 31
  947. for _, parent := range commit.ParentHashes {
  948. if _, exists := hashes[parent.String()]; exists {
  949. parentBranch := branches[parent]
  950. if len(dag[parent]) == 1 && minBranch > parentBranch {
  951. minBranch = parentBranch
  952. }
  953. items = append(items, parentBranch)
  954. if parentBranch != branch {
  955. appendCommit(commit, parentBranch)
  956. }
  957. }
  958. }
  959. if minBranch < 1 << 31 {
  960. branch = minBranch
  961. branches[commit.Hash] = minBranch
  962. } else if !branchExists() {
  963. panic("!branchExists()")
  964. }
  965. plan = append(plan, runAction{
  966. Action: runActionMerge,
  967. Commit: nil,
  968. Items: items,
  969. })
  970. }
  971. if subseq, exists := mergedSeq[commit.Hash]; exists {
  972. for subseqIndex, offspring := range subseq {
  973. if branchExists() {
  974. appendCommit(offspring, branch)
  975. }
  976. if subseqIndex == 0 {
  977. appendMergeIfNeeded()
  978. }
  979. }
  980. branches[subseq[len(subseq)-1].Hash] = branch
  981. }
  982. if len(mergedDag[commit.Hash]) > 1 {
  983. branches[mergedDag[commit.Hash][0].Hash] = branch
  984. children := []int{branch}
  985. for i, child := range mergedDag[commit.Hash] {
  986. if i > 0 {
  987. branches[child.Hash] = counter
  988. children = append(children, counter)
  989. counter++
  990. }
  991. }
  992. plan = append(plan, runAction{
  993. Action: runActionFork,
  994. Commit: nil,
  995. Items: children,
  996. })
  997. }
  998. }
  999. return plan
  1000. }
  1001. // optimizePlan removes "dead" nodes and inserts `runActionDelete` disposal steps.
  1002. //
  1003. // | *
  1004. // * /
  1005. // |\/
  1006. // |/
  1007. // *
  1008. //
  1009. func optimizePlan(plan []runAction) []runAction {
  1010. // lives maps branch index to the number of commits in that branch
  1011. lives := map[int]int{}
  1012. // lastMentioned maps branch index to the index inside `plan` when that branch was last used
  1013. lastMentioned := map[int]int{}
  1014. for i, p := range plan {
  1015. firstItem := p.Items[0]
  1016. switch p.Action {
  1017. case runActionCommit:
  1018. lives[firstItem]++
  1019. lastMentioned[firstItem] = i
  1020. case runActionFork:
  1021. lastMentioned[firstItem] = i
  1022. case runActionMerge:
  1023. for _, item := range p.Items {
  1024. lastMentioned[item] = i
  1025. }
  1026. }
  1027. }
  1028. branchesToDelete := map[int]bool{}
  1029. for key, life := range lives {
  1030. if life == 1 {
  1031. branchesToDelete[key] = true
  1032. delete(lastMentioned, key)
  1033. }
  1034. }
  1035. var optimizedPlan []runAction
  1036. lastMentionedArr := make([][2]int, 0, len(lastMentioned) + 1)
  1037. for key, val := range lastMentioned {
  1038. if val != len(plan) - 1 {
  1039. lastMentionedArr = append(lastMentionedArr, [2]int{val, key})
  1040. }
  1041. }
  1042. if len(lastMentionedArr) == 0 && len(branchesToDelete) == 0 {
  1043. // early return - we have nothing to optimize
  1044. return plan
  1045. }
  1046. sort.Slice(lastMentionedArr, func(i, j int) bool {
  1047. return lastMentionedArr[i][0] < lastMentionedArr[j][0]
  1048. })
  1049. lastMentionedArr = append(lastMentionedArr, [2]int{len(plan)-1, -1})
  1050. prevpi := -1
  1051. for _, pair := range lastMentionedArr {
  1052. for pi := prevpi + 1; pi <= pair[0]; pi++ {
  1053. p := plan[pi]
  1054. switch p.Action {
  1055. case runActionCommit:
  1056. if !branchesToDelete[p.Items[0]] {
  1057. optimizedPlan = append(optimizedPlan, p)
  1058. }
  1059. case runActionFork:
  1060. var newBranches []int
  1061. for _, b := range p.Items {
  1062. if !branchesToDelete[b] {
  1063. newBranches = append(newBranches, b)
  1064. }
  1065. }
  1066. if len(newBranches) > 1 {
  1067. optimizedPlan = append(optimizedPlan, runAction{
  1068. Action: runActionFork,
  1069. Commit: p.Commit,
  1070. Items: newBranches,
  1071. })
  1072. }
  1073. case runActionMerge:
  1074. var newBranches []int
  1075. for _, b := range p.Items {
  1076. if !branchesToDelete[b] {
  1077. newBranches = append(newBranches, b)
  1078. }
  1079. }
  1080. if len(newBranches) > 1 {
  1081. optimizedPlan = append(optimizedPlan, runAction{
  1082. Action: runActionMerge,
  1083. Commit: p.Commit,
  1084. Items: newBranches,
  1085. })
  1086. }
  1087. }
  1088. }
  1089. if pair[1] >= 0 {
  1090. prevpi = pair[0]
  1091. optimizedPlan = append(optimizedPlan, runAction{
  1092. Action: runActionDelete,
  1093. Commit: nil,
  1094. Items: []int{pair[1]},
  1095. })
  1096. }
  1097. }
  1098. // single commit can be detected as redundant
  1099. if len(optimizedPlan) > 0 {
  1100. return optimizedPlan
  1101. }
  1102. return plan
  1103. // TODO(vmarkovtsev): there can be also duplicate redundant merges, e.g.
  1104. /*
  1105. 0 4e34f03d829fbacb71cde0e010de87ea945dc69a [3]
  1106. 0 4e34f03d829fbacb71cde0e010de87ea945dc69a [12]
  1107. 2 [3 12]
  1108. 0 06716c2b39422938b77ddafa4d5c39bb9e4476da [3]
  1109. 0 06716c2b39422938b77ddafa4d5c39bb9e4476da [12]
  1110. 2 [3 12]
  1111. 0 1219c7bf9e0e1a93459a052ab8b351bfc379dc19 [12]
  1112. */
  1113. }