pipeline.go 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098
  1. package core
  2. import (
  3. "bufio"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "log"
  9. "os"
  10. "path/filepath"
  11. "sort"
  12. "strings"
  13. "time"
  14. "gopkg.in/src-d/go-git.v4"
  15. "gopkg.in/src-d/go-git.v4/plumbing"
  16. "gopkg.in/src-d/go-git.v4/plumbing/object"
  17. "gopkg.in/src-d/hercules.v4/internal/pb"
  18. "gopkg.in/src-d/hercules.v4/internal/toposort"
  19. )
  20. // ConfigurationOptionType represents the possible types of a ConfigurationOption's value.
  21. type ConfigurationOptionType int
  22. const (
  23. // BoolConfigurationOption reflects the boolean value type.
  24. BoolConfigurationOption ConfigurationOptionType = iota
  25. // IntConfigurationOption reflects the integer value type.
  26. IntConfigurationOption
  27. // StringConfigurationOption reflects the string value type.
  28. StringConfigurationOption
  29. // FloatConfigurationOption reflects a floating point value type.
  30. FloatConfigurationOption
  31. // StringsConfigurationOption reflects the array of strings value type.
  32. StringsConfigurationOption
  33. )
  34. // String() returns an empty string for the boolean type, "int" for integers and "string" for
  35. // strings. It is used in the command line interface to show the argument's type.
  36. func (opt ConfigurationOptionType) String() string {
  37. switch opt {
  38. case BoolConfigurationOption:
  39. return ""
  40. case IntConfigurationOption:
  41. return "int"
  42. case StringConfigurationOption:
  43. return "string"
  44. case FloatConfigurationOption:
  45. return "float"
  46. case StringsConfigurationOption:
  47. return "string"
  48. }
  49. panic(fmt.Sprintf("Invalid ConfigurationOptionType value %d", opt))
  50. }
  51. // ConfigurationOption allows for the unified, retrospective way to setup PipelineItem-s.
  52. type ConfigurationOption struct {
  53. // Name identifies the configuration option in facts.
  54. Name string
  55. // Description represents the help text about the configuration option.
  56. Description string
  57. // Flag corresponds to the CLI token with "--" prepended.
  58. Flag string
  59. // Type specifies the kind of the configuration option's value.
  60. Type ConfigurationOptionType
  61. // Default is the initial value of the configuration option.
  62. Default interface{}
  63. }
  64. // FormatDefault converts the default value of ConfigurationOption to string.
  65. // Used in the command line interface to show the argument's default value.
  66. func (opt ConfigurationOption) FormatDefault() string {
  67. if opt.Type == StringsConfigurationOption {
  68. return fmt.Sprintf("\"%s\"", strings.Join(opt.Default.([]string), ","))
  69. }
  70. if opt.Type != StringConfigurationOption {
  71. return fmt.Sprint(opt.Default)
  72. }
  73. return fmt.Sprintf("\"%s\"", opt.Default)
  74. }
  75. // PipelineItem is the interface for all the units in the Git commits analysis pipeline.
  76. type PipelineItem interface {
  77. // Name returns the name of the analysis.
  78. Name() string
  79. // Provides returns the list of keys of reusable calculated entities.
  80. // Other items may depend on them.
  81. Provides() []string
  82. // Requires returns the list of keys of needed entities which must be supplied in Consume().
  83. Requires() []string
  84. // ListConfigurationOptions returns the list of available options which can be consumed by Configure().
  85. ListConfigurationOptions() []ConfigurationOption
  86. // Configure performs the initial setup of the object by applying parameters from facts.
  87. // It allows to create PipelineItems in a universal way.
  88. Configure(facts map[string]interface{})
  89. // Initialize prepares and resets the item. Consume() requires Initialize()
  90. // to be called at least once beforehand.
  91. Initialize(*git.Repository)
  92. // Consume processes the next commit.
  93. // deps contains the required entities which match Depends(). Besides, it always includes
  94. // "commit" and "index".
  95. // Returns the calculated entities which match Provides().
  96. Consume(deps map[string]interface{}) (map[string]interface{}, error)
  97. }
  98. // FeaturedPipelineItem enables switching the automatic insertion of pipeline items on or off.
  99. type FeaturedPipelineItem interface {
  100. PipelineItem
  101. // Features returns the list of names which enable this item to be automatically inserted
  102. // in Pipeline.DeployItem().
  103. Features() []string
  104. }
  105. // LeafPipelineItem corresponds to the top level pipeline items which produce the end results.
  106. type LeafPipelineItem interface {
  107. PipelineItem
  108. // Flag returns the cmdline name of the item.
  109. Flag() string
  110. // Finalize returns the result of the analysis.
  111. Finalize() interface{}
  112. // Serialize encodes the object returned by Finalize() to YAML or Protocol Buffers.
  113. Serialize(result interface{}, binary bool, writer io.Writer) error
  114. }
  115. // MergeablePipelineItem specifies the methods to combine several analysis results together.
  116. type MergeablePipelineItem interface {
  117. LeafPipelineItem
  118. // Deserialize loads the result from Protocol Buffers blob.
  119. Deserialize(pbmessage []byte) (interface{}, error)
  120. // MergeResults joins two results together. Common-s are specified as the global state.
  121. MergeResults(r1, r2 interface{}, c1, c2 *CommonAnalysisResult) interface{}
  122. }
  123. // CommonAnalysisResult holds the information which is always extracted at Pipeline.Run().
  124. type CommonAnalysisResult struct {
  125. // Time of the first commit in the analysed sequence.
  126. BeginTime int64
  127. // Time of the last commit in the analysed sequence.
  128. EndTime int64
  129. // The number of commits in the analysed sequence.
  130. CommitsNumber int
  131. // The duration of Pipeline.Run().
  132. RunTime time.Duration
  133. }
  134. // BeginTimeAsTime converts the UNIX timestamp of the beginning to Go time.
  135. func (car *CommonAnalysisResult) BeginTimeAsTime() time.Time {
  136. return time.Unix(car.BeginTime, 0)
  137. }
  138. // EndTimeAsTime converts the UNIX timestamp of the ending to Go time.
  139. func (car *CommonAnalysisResult) EndTimeAsTime() time.Time {
  140. return time.Unix(car.EndTime, 0)
  141. }
  142. // Merge combines the CommonAnalysisResult with an other one.
  143. // We choose the earlier BeginTime, the later EndTime, sum the number of commits and the
  144. // elapsed run times.
  145. func (car *CommonAnalysisResult) Merge(other *CommonAnalysisResult) {
  146. if car.EndTime == 0 || other.BeginTime == 0 {
  147. panic("Merging with an uninitialized CommonAnalysisResult")
  148. }
  149. if other.BeginTime < car.BeginTime {
  150. car.BeginTime = other.BeginTime
  151. }
  152. if other.EndTime > car.EndTime {
  153. car.EndTime = other.EndTime
  154. }
  155. car.CommitsNumber += other.CommitsNumber
  156. car.RunTime += other.RunTime
  157. }
  158. // FillMetadata copies the data to a Protobuf message.
  159. func (car *CommonAnalysisResult) FillMetadata(meta *pb.Metadata) *pb.Metadata {
  160. meta.BeginUnixTime = car.BeginTime
  161. meta.EndUnixTime = car.EndTime
  162. meta.Commits = int32(car.CommitsNumber)
  163. meta.RunTime = car.RunTime.Nanoseconds() / 1e6
  164. return meta
  165. }
  166. // Metadata is defined in internal/pb/pb.pb.go - header of the binary file.
  167. type Metadata = pb.Metadata
  168. // MetadataToCommonAnalysisResult copies the data from a Protobuf message.
  169. func MetadataToCommonAnalysisResult(meta *Metadata) *CommonAnalysisResult {
  170. return &CommonAnalysisResult{
  171. BeginTime: meta.BeginUnixTime,
  172. EndTime: meta.EndUnixTime,
  173. CommitsNumber: int(meta.Commits),
  174. RunTime: time.Duration(meta.RunTime * 1e6),
  175. }
  176. }
  177. // Pipeline is the core Hercules entity which carries several PipelineItems and executes them.
  178. // See the extended example of how a Pipeline works in doc.go
  179. type Pipeline struct {
  180. // OnProgress is the callback which is invoked in Analyse() to output it's
  181. // progress. The first argument is the number of processed commits and the
  182. // second is the total number of commits.
  183. OnProgress func(int, int)
  184. // Repository points to the analysed Git repository struct from go-git.
  185. repository *git.Repository
  186. // Items are the registered building blocks in the pipeline. The order defines the
  187. // execution sequence.
  188. items []PipelineItem
  189. // The collection of parameters to create items.
  190. facts map[string]interface{}
  191. // Feature flags which enable the corresponding items.
  192. features map[string]bool
  193. }
  194. const (
  195. // ConfigPipelineDumpPath is the name of the Pipeline configuration option (Pipeline.Initialize())
  196. // which enables saving the items DAG to the specified file.
  197. ConfigPipelineDumpPath = "Pipeline.DumpPath"
  198. // ConfigPipelineDryRun is the name of the Pipeline configuration option (Pipeline.Initialize())
  199. // which disables Configure() and Initialize() invocation on each PipelineItem during the
  200. // Pipeline initialization.
  201. // Subsequent Run() calls are going to fail. Useful with ConfigPipelineDumpPath=true.
  202. ConfigPipelineDryRun = "Pipeline.DryRun"
  203. // ConfigPipelineCommits is the name of the Pipeline configuration option (Pipeline.Initialize())
  204. // which allows to specify the custom commit sequence. By default, Pipeline.Commits() is used.
  205. ConfigPipelineCommits = "commits"
  206. )
  207. // NewPipeline initializes a new instance of Pipeline struct.
  208. func NewPipeline(repository *git.Repository) *Pipeline {
  209. return &Pipeline{
  210. repository: repository,
  211. items: []PipelineItem{},
  212. facts: map[string]interface{}{},
  213. features: map[string]bool{},
  214. }
  215. }
  216. // GetFact returns the value of the fact with the specified name.
  217. func (pipeline *Pipeline) GetFact(name string) interface{} {
  218. return pipeline.facts[name]
  219. }
  220. // SetFact sets the value of the fact with the specified name.
  221. func (pipeline *Pipeline) SetFact(name string, value interface{}) {
  222. pipeline.facts[name] = value
  223. }
  224. // GetFeature returns the state of the feature with the specified name (enabled/disabled) and
  225. // whether it exists. See also: FeaturedPipelineItem.
  226. func (pipeline *Pipeline) GetFeature(name string) (bool, bool) {
  227. val, exists := pipeline.features[name]
  228. return val, exists
  229. }
  230. // SetFeature sets the value of the feature with the specified name.
  231. // See also: FeaturedPipelineItem.
  232. func (pipeline *Pipeline) SetFeature(name string) {
  233. pipeline.features[name] = true
  234. }
  235. // SetFeaturesFromFlags enables the features which were specified through the command line flags
  236. // which belong to the given PipelineItemRegistry instance.
  237. // See also: AddItem().
  238. func (pipeline *Pipeline) SetFeaturesFromFlags(registry ...*PipelineItemRegistry) {
  239. var ffr *PipelineItemRegistry
  240. if len(registry) == 0 {
  241. ffr = Registry
  242. } else if len(registry) == 1 {
  243. ffr = registry[0]
  244. } else {
  245. panic("Zero or one registry is allowed to be passed.")
  246. }
  247. for _, feature := range ffr.featureFlags.Flags {
  248. pipeline.SetFeature(feature)
  249. }
  250. }
  251. // DeployItem inserts a PipelineItem into the pipeline. It also recursively creates all of it's
  252. // dependencies (PipelineItem.Requires()). Returns the same item as specified in the arguments.
  253. func (pipeline *Pipeline) DeployItem(item PipelineItem) PipelineItem {
  254. fpi, ok := item.(FeaturedPipelineItem)
  255. if ok {
  256. for _, f := range fpi.Features() {
  257. pipeline.SetFeature(f)
  258. }
  259. }
  260. queue := []PipelineItem{}
  261. queue = append(queue, item)
  262. added := map[string]PipelineItem{}
  263. for _, item := range pipeline.items {
  264. added[item.Name()] = item
  265. }
  266. added[item.Name()] = item
  267. pipeline.AddItem(item)
  268. for len(queue) > 0 {
  269. head := queue[0]
  270. queue = queue[1:]
  271. for _, dep := range head.Requires() {
  272. for _, sibling := range Registry.Summon(dep) {
  273. if _, exists := added[sibling.Name()]; !exists {
  274. disabled := false
  275. // If this item supports features, check them against the activated in pipeline.features
  276. if fpi, matches := sibling.(FeaturedPipelineItem); matches {
  277. for _, feature := range fpi.Features() {
  278. if !pipeline.features[feature] {
  279. disabled = true
  280. break
  281. }
  282. }
  283. }
  284. if disabled {
  285. continue
  286. }
  287. added[sibling.Name()] = sibling
  288. queue = append(queue, sibling)
  289. pipeline.AddItem(sibling)
  290. }
  291. }
  292. }
  293. }
  294. return item
  295. }
  296. // AddItem inserts a PipelineItem into the pipeline. It does not check any dependencies.
  297. // See also: DeployItem().
  298. func (pipeline *Pipeline) AddItem(item PipelineItem) PipelineItem {
  299. pipeline.items = append(pipeline.items, item)
  300. return item
  301. }
  302. // RemoveItem deletes a PipelineItem from the pipeline. It leaves all the rest of the items intact.
  303. func (pipeline *Pipeline) RemoveItem(item PipelineItem) {
  304. for i, reg := range pipeline.items {
  305. if reg == item {
  306. pipeline.items = append(pipeline.items[:i], pipeline.items[i+1:]...)
  307. return
  308. }
  309. }
  310. }
  311. // Len returns the number of items in the pipeline.
  312. func (pipeline *Pipeline) Len() int {
  313. return len(pipeline.items)
  314. }
  315. // Commits returns the critical path in the repository's history. It starts
  316. // from HEAD and traces commits backwards till the root. When it encounters
  317. // a merge (more than one parent), it always chooses the first parent.
  318. func (pipeline *Pipeline) Commits() []*object.Commit {
  319. result := []*object.Commit{}
  320. repository := pipeline.repository
  321. head, err := repository.Head()
  322. if err != nil {
  323. panic(err)
  324. }
  325. commit, err := repository.CommitObject(head.Hash())
  326. if err != nil {
  327. panic(err)
  328. }
  329. // the first parent matches the head
  330. for ; err != io.EOF; commit, err = commit.Parents().Next() {
  331. if err != nil {
  332. panic(err)
  333. }
  334. result = append(result, commit)
  335. }
  336. // reverse the order
  337. for i, j := 0, len(result)-1; i < j; i, j = i+1, j-1 {
  338. result[i], result[j] = result[j], result[i]
  339. }
  340. return result
  341. }
  342. type sortablePipelineItems []PipelineItem
  343. func (items sortablePipelineItems) Len() int {
  344. return len(items)
  345. }
  346. func (items sortablePipelineItems) Less(i, j int) bool {
  347. return items[i].Name() < items[j].Name()
  348. }
  349. func (items sortablePipelineItems) Swap(i, j int) {
  350. items[i], items[j] = items[j], items[i]
  351. }
  352. func (pipeline *Pipeline) resolve(dumpPath string) {
  353. graph := toposort.NewGraph()
  354. sort.Sort(sortablePipelineItems(pipeline.items))
  355. name2item := map[string]PipelineItem{}
  356. ambiguousMap := map[string][]string{}
  357. nameUsages := map[string]int{}
  358. for _, item := range pipeline.items {
  359. nameUsages[item.Name()]++
  360. }
  361. counters := map[string]int{}
  362. for _, item := range pipeline.items {
  363. name := item.Name()
  364. if nameUsages[name] > 1 {
  365. index := counters[item.Name()] + 1
  366. counters[item.Name()] = index
  367. name = fmt.Sprintf("%s_%d", item.Name(), index)
  368. }
  369. graph.AddNode(name)
  370. name2item[name] = item
  371. for _, key := range item.Provides() {
  372. key = "[" + key + "]"
  373. graph.AddNode(key)
  374. if graph.AddEdge(name, key) > 1 {
  375. if ambiguousMap[key] != nil {
  376. fmt.Fprintln(os.Stderr, "Pipeline:")
  377. for _, item2 := range pipeline.items {
  378. if item2 == item {
  379. fmt.Fprint(os.Stderr, "> ")
  380. }
  381. fmt.Fprint(os.Stderr, item2.Name(), " [")
  382. for i, key2 := range item2.Provides() {
  383. fmt.Fprint(os.Stderr, key2)
  384. if i < len(item.Provides())-1 {
  385. fmt.Fprint(os.Stderr, ", ")
  386. }
  387. }
  388. fmt.Fprintln(os.Stderr, "]")
  389. }
  390. panic("Failed to resolve pipeline dependencies: ambiguous graph.")
  391. }
  392. ambiguousMap[key] = graph.FindParents(key)
  393. }
  394. }
  395. }
  396. counters = map[string]int{}
  397. for _, item := range pipeline.items {
  398. name := item.Name()
  399. if nameUsages[name] > 1 {
  400. index := counters[item.Name()] + 1
  401. counters[item.Name()] = index
  402. name = fmt.Sprintf("%s_%d", item.Name(), index)
  403. }
  404. for _, key := range item.Requires() {
  405. key = "[" + key + "]"
  406. if graph.AddEdge(key, name) == 0 {
  407. panic(fmt.Sprintf("Unsatisfied dependency: %s -> %s", key, item.Name()))
  408. }
  409. }
  410. }
  411. // Try to break the cycles in some known scenarios.
  412. if len(ambiguousMap) > 0 {
  413. ambiguous := []string{}
  414. for key := range ambiguousMap {
  415. ambiguous = append(ambiguous, key)
  416. }
  417. sort.Strings(ambiguous)
  418. bfsorder := graph.BreadthSort()
  419. bfsindex := map[string]int{}
  420. for i, s := range bfsorder {
  421. bfsindex[s] = i
  422. }
  423. for len(ambiguous) > 0 {
  424. key := ambiguous[0]
  425. ambiguous = ambiguous[1:]
  426. pair := ambiguousMap[key]
  427. inheritor := pair[1]
  428. if bfsindex[pair[1]] < bfsindex[pair[0]] {
  429. inheritor = pair[0]
  430. }
  431. removed := graph.RemoveEdge(key, inheritor)
  432. cycle := map[string]bool{}
  433. for _, node := range graph.FindCycle(key) {
  434. cycle[node] = true
  435. }
  436. if len(cycle) == 0 {
  437. cycle[inheritor] = true
  438. }
  439. if removed {
  440. graph.AddEdge(key, inheritor)
  441. }
  442. graph.RemoveEdge(inheritor, key)
  443. graph.ReindexNode(inheritor)
  444. // for all nodes key links to except those in cycle, put the link from inheritor
  445. for _, node := range graph.FindChildren(key) {
  446. if _, exists := cycle[node]; !exists {
  447. graph.AddEdge(inheritor, node)
  448. graph.RemoveEdge(key, node)
  449. }
  450. }
  451. graph.ReindexNode(key)
  452. }
  453. }
  454. var graphCopy *toposort.Graph
  455. if dumpPath != "" {
  456. graphCopy = graph.Copy()
  457. }
  458. strplan, ok := graph.Toposort()
  459. if !ok {
  460. panic("Failed to resolve pipeline dependencies: unable to topologically sort the items.")
  461. }
  462. pipeline.items = make([]PipelineItem, 0, len(pipeline.items))
  463. for _, key := range strplan {
  464. if item, ok := name2item[key]; ok {
  465. pipeline.items = append(pipeline.items, item)
  466. }
  467. }
  468. if dumpPath != "" {
  469. // If there is a floating difference, uncomment this:
  470. // fmt.Fprint(os.Stderr, graphCopy.DebugDump())
  471. ioutil.WriteFile(dumpPath, []byte(graphCopy.Serialize(strplan)), 0666)
  472. absPath, _ := filepath.Abs(dumpPath)
  473. log.Printf("Wrote the DAG to %s\n", absPath)
  474. }
  475. }
  476. // Initialize prepares the pipeline for the execution (Run()). This function
  477. // resolves the execution DAG, Configure()-s and Initialize()-s the items in it in the
  478. // topological dependency order. `facts` are passed inside Configure(). They are mutable.
  479. func (pipeline *Pipeline) Initialize(facts map[string]interface{}) {
  480. if facts == nil {
  481. facts = map[string]interface{}{}
  482. }
  483. if _, exists := facts[ConfigPipelineCommits]; !exists {
  484. facts[ConfigPipelineCommits] = pipeline.Commits()
  485. }
  486. dumpPath, _ := facts[ConfigPipelineDumpPath].(string)
  487. pipeline.resolve(dumpPath)
  488. if dryRun, _ := facts[ConfigPipelineDryRun].(bool); dryRun {
  489. return
  490. }
  491. for _, item := range pipeline.items {
  492. item.Configure(facts)
  493. }
  494. for _, item := range pipeline.items {
  495. item.Initialize(pipeline.repository)
  496. }
  497. }
  498. // Run method executes the pipeline.
  499. //
  500. // `commits` is a slice with the git commits to analyse. Multiple branches are supported.
  501. //
  502. // Returns the mapping from each LeafPipelineItem to the corresponding analysis result.
  503. // There is always a "nil" record with CommonAnalysisResult.
  504. func (pipeline *Pipeline) Run(commits []*object.Commit) (map[LeafPipelineItem]interface{}, error) {
  505. startRunTime := time.Now()
  506. onProgress := pipeline.OnProgress
  507. if onProgress == nil {
  508. onProgress = func(int, int) {}
  509. }
  510. for index, commit := range commits {
  511. onProgress(index, len(commits))
  512. state := map[string]interface{}{"commit": commit, "index": index}
  513. for _, item := range pipeline.items {
  514. update, err := item.Consume(state)
  515. if err != nil {
  516. log.Printf("%s failed on commit #%d %s\n",
  517. item.Name(), index, commit.Hash.String())
  518. return nil, err
  519. }
  520. for _, key := range item.Provides() {
  521. val, ok := update[key]
  522. if !ok {
  523. panic(fmt.Sprintf("%s: Consume() did not return %s", item.Name(), key))
  524. }
  525. state[key] = val
  526. }
  527. }
  528. }
  529. onProgress(len(commits), len(commits))
  530. result := map[LeafPipelineItem]interface{}{}
  531. for _, item := range pipeline.items {
  532. if casted, ok := item.(LeafPipelineItem); ok {
  533. result[casted] = casted.Finalize()
  534. }
  535. }
  536. result[nil] = &CommonAnalysisResult{
  537. BeginTime: commits[0].Author.When.Unix(),
  538. EndTime: commits[len(commits)-1].Author.When.Unix(),
  539. CommitsNumber: len(commits),
  540. RunTime: time.Since(startRunTime),
  541. }
  542. return result, nil
  543. }
  544. // LoadCommitsFromFile reads the file by the specified FS path and generates the sequence of commits
  545. // by interpreting each line as a Git commit hash.
  546. func LoadCommitsFromFile(path string, repository *git.Repository) ([]*object.Commit, error) {
  547. var file io.ReadCloser
  548. if path != "-" {
  549. var err error
  550. file, err = os.Open(path)
  551. if err != nil {
  552. return nil, err
  553. }
  554. defer file.Close()
  555. } else {
  556. file = os.Stdin
  557. }
  558. scanner := bufio.NewScanner(file)
  559. commits := []*object.Commit{}
  560. for scanner.Scan() {
  561. hash := plumbing.NewHash(scanner.Text())
  562. if len(hash) != 20 {
  563. return nil, errors.New("invalid commit hash " + scanner.Text())
  564. }
  565. commit, err := repository.CommitObject(hash)
  566. if err != nil {
  567. return nil, err
  568. }
  569. commits = append(commits, commit)
  570. }
  571. return commits, nil
  572. }
  573. const (
  574. runActionCommit = 0
  575. runActionFork = iota
  576. runActionMerge = iota
  577. runActionDelete = iota
  578. )
  579. type runAction struct {
  580. Action int
  581. Commit *object.Commit
  582. Items []int
  583. }
  584. func prepareRunPlan(commits []*object.Commit) []runAction {
  585. hashes, dag := buildDag(commits)
  586. leaveRootComponent(hashes, dag)
  587. numParents := bindNumParents(hashes, dag)
  588. mergedDag, mergedSeq := mergeDag(numParents, hashes, dag)
  589. orderNodes := bindOrderNodes(mergedDag)
  590. collapseFastForwards(orderNodes, hashes, mergedDag, dag, mergedSeq)
  591. /*fmt.Printf("digraph Hercules {\n")
  592. for i, c := range order {
  593. commit := hashes[c]
  594. fmt.Printf(" \"%s\"[label=\"[%d] %s\"]\n", commit.Hash.String(), i, commit.Hash.String()[:6])
  595. for _, child := range mergedDag[commit.Hash] {
  596. fmt.Printf(" \"%s\" -> \"%s\"\n", commit.Hash.String(), child.Hash.String())
  597. }
  598. }
  599. fmt.Printf("}\n")*/
  600. plan := generatePlan(orderNodes, numParents, hashes, mergedDag, dag, mergedSeq)
  601. plan = optimizePlan(plan)
  602. return plan
  603. }
  604. // buildDag generates the raw commit DAG and the commit hash map.
  605. func buildDag(commits []*object.Commit) (
  606. map[string]*object.Commit, map[plumbing.Hash][]*object.Commit) {
  607. hashes := map[string]*object.Commit{}
  608. for _, commit := range commits {
  609. hashes[commit.Hash.String()] = commit
  610. }
  611. dag := map[plumbing.Hash][]*object.Commit{}
  612. for _, commit := range commits {
  613. if _, exists := dag[commit.Hash]; !exists {
  614. dag[commit.Hash] = make([]*object.Commit, 0, 1)
  615. }
  616. for _, parent := range commit.ParentHashes {
  617. if _, exists := hashes[parent.String()]; !exists {
  618. continue
  619. }
  620. children := dag[parent]
  621. if children == nil {
  622. children = make([]*object.Commit, 0, 1)
  623. }
  624. dag[parent] = append(children, commit)
  625. }
  626. }
  627. return hashes, dag
  628. }
  629. // bindNumParents returns curried "numParents" function.
  630. func bindNumParents(
  631. hashes map[string]*object.Commit,
  632. dag map[plumbing.Hash][]*object.Commit) func(c *object.Commit) int {
  633. return func(c *object.Commit) int {
  634. r := 0
  635. for _, parent := range c.ParentHashes {
  636. if p, exists := hashes[parent.String()]; exists {
  637. for _, pc := range dag[p.Hash] {
  638. if pc.Hash == c.Hash {
  639. r++
  640. break
  641. }
  642. }
  643. }
  644. }
  645. return r
  646. }
  647. }
  648. // leaveRootComponent runs connected components analysis and throws away everything
  649. // but the part which grows from the root.
  650. func leaveRootComponent(
  651. hashes map[string]*object.Commit,
  652. dag map[plumbing.Hash][]*object.Commit) {
  653. visited := map[plumbing.Hash]bool{}
  654. var sets [][]plumbing.Hash
  655. for key := range dag {
  656. if visited[key] {
  657. continue
  658. }
  659. var set []plumbing.Hash
  660. for queue := []plumbing.Hash{key}; len(queue) > 0; {
  661. head := queue[len(queue)-1]
  662. queue = queue[:len(queue)-1]
  663. if visited[head] {
  664. continue
  665. }
  666. set = append(set, head)
  667. visited[head] = true
  668. for _, c := range dag[head] {
  669. if !visited[c.Hash] {
  670. queue = append(queue, c.Hash)
  671. }
  672. }
  673. if commit, exists := hashes[head.String()]; exists {
  674. for _, p := range commit.ParentHashes {
  675. if !visited[p] {
  676. if _, exists := hashes[p.String()]; exists {
  677. queue = append(queue, p)
  678. }
  679. }
  680. }
  681. }
  682. }
  683. sets = append(sets, set)
  684. }
  685. if len(sets) > 1 {
  686. maxlen := 0
  687. maxind := -1
  688. for i, set := range sets {
  689. if len(set) > maxlen {
  690. maxlen = len(set)
  691. maxind = i
  692. }
  693. }
  694. for i, set := range sets {
  695. if i == maxind {
  696. continue
  697. }
  698. for _, h := range set {
  699. log.Printf("warning: dropped %s from the analysis - disjoint", h.String())
  700. delete(dag, h)
  701. delete(hashes, h.String())
  702. }
  703. }
  704. }
  705. }
  706. // bindOrderNodes returns curried "orderNodes" function.
  707. func bindOrderNodes(mergedDag map[plumbing.Hash][]*object.Commit) func(reverse bool) []string {
  708. return func(reverse bool) []string {
  709. graph := toposort.NewGraph()
  710. keys := make([]plumbing.Hash, 0, len(mergedDag))
  711. for key := range mergedDag {
  712. keys = append(keys, key)
  713. }
  714. sort.Slice(keys, func(i, j int) bool { return keys[i].String() < keys[j].String() })
  715. for _, key := range keys {
  716. graph.AddNode(key.String())
  717. }
  718. for _, key := range keys {
  719. children := mergedDag[key]
  720. sort.Slice(children, func(i, j int) bool {
  721. return children[i].Hash.String() < children[j].Hash.String()
  722. })
  723. for _, c := range children {
  724. graph.AddEdge(key.String(), c.Hash.String())
  725. }
  726. }
  727. order, ok := graph.Toposort()
  728. if !ok {
  729. // should never happen
  730. panic("Could not topologically sort the DAG of commits")
  731. }
  732. if reverse {
  733. // one day this must appear in the standard library...
  734. for i, j := 0, len(order)-1; i < len(order)/2; i, j = i+1, j-1 {
  735. order[i], order[j] = order[j], order[i]
  736. }
  737. }
  738. return order
  739. }
  740. }
  741. // mergeDag turns sequences of consecutive commits into single nodes.
  742. func mergeDag(
  743. numParents func(c *object.Commit) int,
  744. hashes map[string]*object.Commit,
  745. dag map[plumbing.Hash][]*object.Commit) (
  746. mergedDag, mergedSeq map[plumbing.Hash][]*object.Commit) {
  747. parentOf := func(c *object.Commit) plumbing.Hash {
  748. var parent plumbing.Hash
  749. for _, p := range c.ParentHashes {
  750. if _, exists := hashes[p.String()]; exists {
  751. if parent != plumbing.ZeroHash {
  752. // more than one parent
  753. return plumbing.ZeroHash
  754. }
  755. parent = p
  756. }
  757. }
  758. return parent
  759. }
  760. mergedDag = map[plumbing.Hash][]*object.Commit{}
  761. mergedSeq = map[plumbing.Hash][]*object.Commit{}
  762. visited := map[plumbing.Hash]bool{}
  763. for ch := range dag {
  764. c := hashes[ch.String()]
  765. if visited[c.Hash] {
  766. continue
  767. }
  768. for true {
  769. parent := parentOf(c)
  770. if parent == plumbing.ZeroHash || len(dag[parent]) != 1 {
  771. break
  772. }
  773. c = hashes[parent.String()]
  774. }
  775. head := c
  776. var seq []*object.Commit
  777. children := dag[c.Hash]
  778. for true {
  779. visited[c.Hash] = true
  780. seq = append(seq, c)
  781. if len(children) != 1 {
  782. break
  783. }
  784. c = children[0]
  785. children = dag[c.Hash]
  786. if numParents(c) != 1 {
  787. break
  788. }
  789. }
  790. mergedSeq[head.Hash] = seq
  791. mergedDag[head.Hash] = dag[seq[len(seq)-1].Hash]
  792. }
  793. return
  794. }
  795. // collapseFastForwards removes the fast forward merges.
  796. func collapseFastForwards(
  797. orderNodes func(reverse bool) []string,
  798. hashes map[string]*object.Commit,
  799. mergedDag, dag, mergedSeq map[plumbing.Hash][]*object.Commit) {
  800. for _, strkey := range orderNodes(true) {
  801. key := hashes[strkey].Hash
  802. vals, exists := mergedDag[key]
  803. if !exists {
  804. continue
  805. }
  806. if len(vals) == 2 {
  807. grand1 := mergedDag[vals[0].Hash]
  808. grand2 := mergedDag[vals[1].Hash]
  809. if len(grand2) == 1 && vals[0].Hash == grand2[0].Hash {
  810. mergedDag[key] = mergedDag[vals[0].Hash]
  811. dag[key] = vals[1:]
  812. delete(mergedDag, vals[0].Hash)
  813. delete(mergedDag, vals[1].Hash)
  814. mergedSeq[key] = append(mergedSeq[key], mergedSeq[vals[1].Hash]...)
  815. mergedSeq[key] = append(mergedSeq[key], mergedSeq[vals[0].Hash]...)
  816. delete(mergedSeq, vals[0].Hash)
  817. delete(mergedSeq, vals[1].Hash)
  818. }
  819. // symmetric
  820. if len(grand1) == 1 && vals[1].Hash == grand1[0].Hash {
  821. mergedDag[key] = mergedDag[vals[1].Hash]
  822. dag[key] = vals[:1]
  823. delete(mergedDag, vals[0].Hash)
  824. delete(mergedDag, vals[1].Hash)
  825. mergedSeq[key] = append(mergedSeq[key], mergedSeq[vals[0].Hash]...)
  826. mergedSeq[key] = append(mergedSeq[key], mergedSeq[vals[1].Hash]...)
  827. delete(mergedSeq, vals[0].Hash)
  828. delete(mergedSeq, vals[1].Hash)
  829. }
  830. }
  831. }
  832. }
  833. // generatePlan creates the list of actions from the commit DAG.
  834. func generatePlan(
  835. orderNodes func(reverse bool) []string,
  836. numParents func(c *object.Commit) int,
  837. hashes map[string]*object.Commit,
  838. mergedDag, dag, mergedSeq map[plumbing.Hash][]*object.Commit) []runAction {
  839. var plan []runAction
  840. branches := map[plumbing.Hash]int{}
  841. counter := 1
  842. for seqIndex, name := range orderNodes(false) {
  843. commit := hashes[name]
  844. if seqIndex == 0 {
  845. branches[commit.Hash] = 0
  846. }
  847. var branch int
  848. {
  849. var exists bool
  850. branch, exists = branches[commit.Hash]
  851. if !exists {
  852. branch = -1
  853. }
  854. }
  855. branchExists := func() bool { return branch >= 0 }
  856. appendCommit := func(c *object.Commit, branch int) {
  857. plan = append(plan, runAction{
  858. Action: runActionCommit,
  859. Commit: c,
  860. Items: []int{branch},
  861. })
  862. }
  863. appendMergeIfNeeded := func() {
  864. if numParents(commit) < 2 {
  865. return
  866. }
  867. // merge after the merge commit (the first in the sequence)
  868. var items []int
  869. minBranch := 1 << 31
  870. for _, parent := range commit.ParentHashes {
  871. if _, exists := hashes[parent.String()]; exists {
  872. parentBranch := branches[parent]
  873. if len(dag[parent]) == 1 && minBranch > parentBranch {
  874. minBranch = parentBranch
  875. }
  876. items = append(items, parentBranch)
  877. if parentBranch != branch {
  878. appendCommit(commit, parentBranch)
  879. }
  880. }
  881. }
  882. if minBranch < 1 << 31 {
  883. branch = minBranch
  884. branches[commit.Hash] = minBranch
  885. } else if !branchExists() {
  886. panic("!branchExists()")
  887. }
  888. plan = append(plan, runAction{
  889. Action: runActionMerge,
  890. Commit: nil,
  891. Items: items,
  892. })
  893. }
  894. if subseq, exists := mergedSeq[commit.Hash]; exists {
  895. for subseqIndex, offspring := range subseq {
  896. if branchExists() {
  897. appendCommit(offspring, branch)
  898. }
  899. if subseqIndex == 0 {
  900. appendMergeIfNeeded()
  901. }
  902. }
  903. branches[subseq[len(subseq)-1].Hash] = branch
  904. }
  905. if len(mergedDag[commit.Hash]) > 1 {
  906. branches[mergedDag[commit.Hash][0].Hash] = branch
  907. children := []int{branch}
  908. for i, child := range mergedDag[commit.Hash] {
  909. if i > 0 {
  910. branches[child.Hash] = counter
  911. children = append(children, counter)
  912. counter++
  913. }
  914. }
  915. plan = append(plan, runAction{
  916. Action: runActionFork,
  917. Commit: nil,
  918. Items: children,
  919. })
  920. }
  921. }
  922. return plan
  923. }
  924. // optimizePlan removes "dead" nodes and inserts `runActionDelete` disposal steps.
  925. //
  926. // | *
  927. // * /
  928. // |\/
  929. // |/
  930. // *
  931. //
  932. func optimizePlan(plan []runAction) []runAction {
  933. // lives maps branch index to the number of commits in that branch
  934. lives := map[int]int{}
  935. // lastMentioned maps branch index to the index inside `plan` when that branch was last used
  936. lastMentioned := map[int]int{}
  937. for i, p := range plan {
  938. firstItem := p.Items[0]
  939. switch p.Action {
  940. case runActionCommit:
  941. lives[firstItem]++
  942. lastMentioned[firstItem] = i
  943. case runActionFork:
  944. lastMentioned[firstItem] = i
  945. case runActionMerge:
  946. for _, item := range p.Items {
  947. lastMentioned[item] = i
  948. }
  949. }
  950. }
  951. branchesToDelete := map[int]bool{}
  952. for key, life := range lives {
  953. if life == 1 {
  954. branchesToDelete[key] = true
  955. delete(lastMentioned, key)
  956. }
  957. }
  958. var optimizedPlan []runAction
  959. lastMentionedArr := make([][2]int, 0, len(lastMentioned) + 1)
  960. for key, val := range lastMentioned {
  961. if val != len(plan) - 1 {
  962. lastMentionedArr = append(lastMentionedArr, [2]int{val, key})
  963. }
  964. }
  965. if len(lastMentionedArr) == 0 && len(branchesToDelete) == 0 {
  966. // early return - we have nothing to optimize
  967. return plan
  968. }
  969. sort.Slice(lastMentionedArr, func(i, j int) bool {
  970. return lastMentionedArr[i][0] < lastMentionedArr[j][0]
  971. })
  972. lastMentionedArr = append(lastMentionedArr, [2]int{len(plan)-1, -1})
  973. prevpi := -1
  974. for _, pair := range lastMentionedArr {
  975. for pi := prevpi + 1; pi <= pair[0]; pi++ {
  976. p := plan[pi]
  977. switch p.Action {
  978. case runActionCommit:
  979. if !branchesToDelete[p.Items[0]] {
  980. optimizedPlan = append(optimizedPlan, p)
  981. }
  982. case runActionFork:
  983. var newBranches []int
  984. for _, b := range p.Items {
  985. if !branchesToDelete[b] {
  986. newBranches = append(newBranches, b)
  987. }
  988. }
  989. if len(newBranches) > 1 {
  990. optimizedPlan = append(optimizedPlan, runAction{
  991. Action: runActionFork,
  992. Commit: p.Commit,
  993. Items: newBranches,
  994. })
  995. }
  996. case runActionMerge:
  997. var newBranches []int
  998. for _, b := range p.Items {
  999. if !branchesToDelete[b] {
  1000. newBranches = append(newBranches, b)
  1001. }
  1002. }
  1003. if len(newBranches) > 1 {
  1004. optimizedPlan = append(optimizedPlan, runAction{
  1005. Action: runActionMerge,
  1006. Commit: p.Commit,
  1007. Items: newBranches,
  1008. })
  1009. }
  1010. }
  1011. }
  1012. if pair[1] >= 0 {
  1013. prevpi = pair[0]
  1014. optimizedPlan = append(optimizedPlan, runAction{
  1015. Action: runActionDelete,
  1016. Commit: nil,
  1017. Items: []int{pair[1]},
  1018. })
  1019. }
  1020. }
  1021. // single commit can be detected as redundant
  1022. if len(optimizedPlan) > 0 {
  1023. return optimizedPlan
  1024. }
  1025. return plan
  1026. // TODO(vmarkovtsev): there can be also duplicate redundant merges, e.g.
  1027. /*
  1028. 0 4e34f03d829fbacb71cde0e010de87ea945dc69a [3]
  1029. 0 4e34f03d829fbacb71cde0e010de87ea945dc69a [12]
  1030. 2 [3 12]
  1031. 0 06716c2b39422938b77ddafa4d5c39bb9e4476da [3]
  1032. 0 06716c2b39422938b77ddafa4d5c39bb9e4476da [12]
  1033. 2 [3 12]
  1034. 0 1219c7bf9e0e1a93459a052ab8b351bfc379dc19 [12]
  1035. */
  1036. }