pipeline.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583
  1. package hercules
  2. import (
  3. "bufio"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "os"
  9. "path/filepath"
  10. "sort"
  11. "time"
  12. "gopkg.in/src-d/go-git.v4"
  13. "gopkg.in/src-d/go-git.v4/plumbing"
  14. "gopkg.in/src-d/go-git.v4/plumbing/object"
  15. "gopkg.in/src-d/hercules.v3/pb"
  16. "gopkg.in/src-d/hercules.v3/toposort"
  17. )
  18. // ConfigurationOptionType represents the possible types of a ConfigurationOption's value.
  19. type ConfigurationOptionType int
  20. const (
  21. // BoolConfigurationOption reflects the boolean value type.
  22. BoolConfigurationOption ConfigurationOptionType = iota
  23. // IntConfigurationOption reflects the integer value type.
  24. IntConfigurationOption
  25. // StringConfigurationOption reflects the string value type.
  26. StringConfigurationOption
  27. )
  28. // String() returns an empty string for the boolean type, "int" for integers and "string" for
  29. // strings. It is used in the command line interface to show the argument's type.
  30. func (opt ConfigurationOptionType) String() string {
  31. switch opt {
  32. case BoolConfigurationOption:
  33. return ""
  34. case IntConfigurationOption:
  35. return "int"
  36. case StringConfigurationOption:
  37. return "string"
  38. }
  39. panic(fmt.Sprintf("Invalid ConfigurationOptionType value %d", opt))
  40. }
  41. // ConfigurationOption allows for the unified, retrospective way to setup PipelineItem-s.
  42. type ConfigurationOption struct {
  43. // Name identifies the configuration option in facts.
  44. Name string
  45. // Description represents the help text about the configuration option.
  46. Description string
  47. // Flag corresponds to the CLI token with "--" prepended.
  48. Flag string
  49. // Type specifies the kind of the configuration option's value.
  50. Type ConfigurationOptionType
  51. // Default is the initial value of the configuration option.
  52. Default interface{}
  53. }
  54. // FormatDefault() converts the default value of ConfigurationOption to string.
  55. // Used in the command line interface to show the argument's default value.
  56. func (opt ConfigurationOption) FormatDefault() string {
  57. if opt.Type != StringConfigurationOption {
  58. return fmt.Sprint(opt.Default)
  59. }
  60. return fmt.Sprintf("\"%s\"", opt.Default)
  61. }
  62. // PipelineItem is the interface for all the units in the Git commits analysis pipeline.
  63. type PipelineItem interface {
  64. // Name returns the name of the analysis.
  65. Name() string
  66. // Provides returns the list of keys of reusable calculated entities.
  67. // Other items may depend on them.
  68. Provides() []string
  69. // Requires returns the list of keys of needed entities which must be supplied in Consume().
  70. Requires() []string
  71. // ListConfigurationOptions returns the list of available options which can be consumed by Configure().
  72. ListConfigurationOptions() []ConfigurationOption
  73. // Configure performs the initial setup of the object by applying parameters from facts.
  74. // It allows to create PipelineItems in a universal way.
  75. Configure(facts map[string]interface{})
  76. // Initialize prepares and resets the item. Consume() requires Initialize()
  77. // to be called at least once beforehand.
  78. Initialize(*git.Repository)
  79. // Consume processes the next commit.
  80. // deps contains the required entities which match Depends(). Besides, it always includes
  81. // "commit" and "index".
  82. // Returns the calculated entities which match Provides().
  83. Consume(deps map[string]interface{}) (map[string]interface{}, error)
  84. }
  85. // FeaturedPipelineItem enables switching the automatic insertion of pipeline items on or off.
  86. type FeaturedPipelineItem interface {
  87. PipelineItem
  88. // Features returns the list of names which enable this item to be automatically inserted
  89. // in Pipeline.DeployItem().
  90. Features() []string
  91. }
  92. // LeafPipelineItem corresponds to the top level pipeline items which produce the end results.
  93. type LeafPipelineItem interface {
  94. PipelineItem
  95. // Flag returns the cmdline name of the item.
  96. Flag() string
  97. // Finalize returns the result of the analysis.
  98. Finalize() interface{}
  99. // Serialize encodes the object returned by Finalize() to YAML or Protocol Buffers.
  100. Serialize(result interface{}, binary bool, writer io.Writer) error
  101. }
  102. // MergeablePipelineItem specifies the methods to combine several analysis results together.
  103. type MergeablePipelineItem interface {
  104. LeafPipelineItem
  105. // Deserialize loads the result from Protocol Buffers blob.
  106. Deserialize(pbmessage []byte) (interface{}, error)
  107. // MergeResults joins two results together. Common-s are specified as the global state.
  108. MergeResults(r1, r2 interface{}, c1, c2 *CommonAnalysisResult) interface{}
  109. }
  110. // CommonAnalysisResult holds the information which is always extracted at Pipeline.Run().
  111. type CommonAnalysisResult struct {
  112. // Time of the first commit in the analysed sequence.
  113. BeginTime int64
  114. // Time of the last commit in the analysed sequence.
  115. EndTime int64
  116. // The number of commits in the analysed sequence.
  117. CommitsNumber int
  118. // The duration of Pipeline.Run().
  119. RunTime time.Duration
  120. }
  121. // BeginTimeAsTime() converts the UNIX timestamp of the beginning to Go time.
  122. func (car *CommonAnalysisResult) BeginTimeAsTime() time.Time {
  123. return time.Unix(car.BeginTime, 0)
  124. }
  125. // EndTimeAsTime() converts the UNIX timestamp of the ending to Go time.
  126. func (car *CommonAnalysisResult) EndTimeAsTime() time.Time {
  127. return time.Unix(car.EndTime, 0)
  128. }
  129. // Merge() combines the CommonAnalysisResult with an other one.
  130. // We choose the earlier BeginTime, the later EndTime, sum the number of commits and the
  131. // elapsed run times.
  132. func (car *CommonAnalysisResult) Merge(other *CommonAnalysisResult) {
  133. if car.EndTime == 0 || other.BeginTime == 0 {
  134. panic("Merging with an uninitialized CommonAnalysisResult")
  135. }
  136. if other.BeginTime < car.BeginTime {
  137. car.BeginTime = other.BeginTime
  138. }
  139. if other.EndTime > car.EndTime {
  140. car.EndTime = other.EndTime
  141. }
  142. car.CommitsNumber += other.CommitsNumber
  143. car.RunTime += other.RunTime
  144. }
  145. // FillMetadata() copies the data to a Protobuf message.
  146. func (car *CommonAnalysisResult) FillMetadata(meta *pb.Metadata) *pb.Metadata {
  147. meta.BeginUnixTime = car.BeginTime
  148. meta.EndUnixTime = car.EndTime
  149. meta.Commits = int32(car.CommitsNumber)
  150. meta.RunTime = car.RunTime.Nanoseconds() / 1e6
  151. return meta
  152. }
  153. // MetadataToCommonAnalysisResult copies the data from a Protobuf message.
  154. func MetadataToCommonAnalysisResult(meta *pb.Metadata) *CommonAnalysisResult {
  155. return &CommonAnalysisResult{
  156. BeginTime: meta.BeginUnixTime,
  157. EndTime: meta.EndUnixTime,
  158. CommitsNumber: int(meta.Commits),
  159. RunTime: time.Duration(meta.RunTime * 1e6),
  160. }
  161. }
  162. // Pipeline is the core Hercules entity which carries several PipelineItems and executes them.
  163. // See the extended example of how a Pipeline works in doc.go
  164. type Pipeline struct {
  165. // OnProgress is the callback which is invoked in Analyse() to output it's
  166. // progress. The first argument is the number of processed commits and the
  167. // second is the total number of commits.
  168. OnProgress func(int, int)
  169. // Repository points to the analysed Git repository struct from go-git.
  170. repository *git.Repository
  171. // Items are the registered building blocks in the pipeline. The order defines the
  172. // execution sequence.
  173. items []PipelineItem
  174. // The collection of parameters to create items.
  175. facts map[string]interface{}
  176. // Feature flags which enable the corresponding items.
  177. features map[string]bool
  178. }
  179. const (
  180. // ConfigPipelineDumpPath is the name of the Pipeline configuration option (Pipeline.Initialize())
  181. // which enables saving the items DAG to the specified file.
  182. ConfigPipelineDumpPath = "Pipeline.DumpPath"
  183. // ConfigPipelineDryRun is the name of the Pipeline configuration option (Pipeline.Initialize())
  184. // which disables Configure() and Initialize() invocation on each PipelineItem during the
  185. // Pipeline initialization.
  186. // Subsequent Run() calls are going to fail. Useful with ConfigPipelineDumpPath=true.
  187. ConfigPipelineDryRun = "Pipeline.DryRun"
  188. // ConfigPipelineCommits is the name of the Pipeline configuration option (Pipeline.Initialize())
  189. // which allows to specify the custom commit sequence. By default, Pipeline.Commits() is used.
  190. ConfigPipelineCommits = "commits"
  191. )
  192. // NewPipeline initializes a new instance of Pipeline struct.
  193. func NewPipeline(repository *git.Repository) *Pipeline {
  194. return &Pipeline{
  195. repository: repository,
  196. items: []PipelineItem{},
  197. facts: map[string]interface{}{},
  198. features: map[string]bool{},
  199. }
  200. }
  201. func (pipeline *Pipeline) GetFact(name string) interface{} {
  202. return pipeline.facts[name]
  203. }
  204. func (pipeline *Pipeline) SetFact(name string, value interface{}) {
  205. pipeline.facts[name] = value
  206. }
  207. func (pipeline *Pipeline) GetFeature(name string) (bool, bool) {
  208. val, exists := pipeline.features[name]
  209. return val, exists
  210. }
  211. func (pipeline *Pipeline) SetFeature(name string) {
  212. pipeline.features[name] = true
  213. }
  214. func (pipeline *Pipeline) SetFeaturesFromFlags(registry ...*PipelineItemRegistry) {
  215. var ffr *PipelineItemRegistry
  216. if len(registry) == 0 {
  217. ffr = Registry
  218. } else if len(registry) == 1 {
  219. ffr = registry[0]
  220. } else {
  221. panic("Zero or one registry is allowed to be passed.")
  222. }
  223. for _, feature := range ffr.featureFlags.Flags {
  224. pipeline.SetFeature(feature)
  225. }
  226. }
  227. func (pipeline *Pipeline) DeployItem(item PipelineItem) PipelineItem {
  228. fpi, ok := item.(FeaturedPipelineItem)
  229. if ok {
  230. for _, f := range fpi.Features() {
  231. pipeline.SetFeature(f)
  232. }
  233. }
  234. queue := []PipelineItem{}
  235. queue = append(queue, item)
  236. added := map[string]PipelineItem{}
  237. for _, item := range pipeline.items {
  238. added[item.Name()] = item
  239. }
  240. added[item.Name()] = item
  241. pipeline.AddItem(item)
  242. for len(queue) > 0 {
  243. head := queue[0]
  244. queue = queue[1:]
  245. for _, dep := range head.Requires() {
  246. for _, sibling := range Registry.Summon(dep) {
  247. if _, exists := added[sibling.Name()]; !exists {
  248. disabled := false
  249. // If this item supports features, check them against the activated in pipeline.features
  250. if fpi, matches := sibling.(FeaturedPipelineItem); matches {
  251. for _, feature := range fpi.Features() {
  252. if !pipeline.features[feature] {
  253. disabled = true
  254. break
  255. }
  256. }
  257. }
  258. if disabled {
  259. continue
  260. }
  261. added[sibling.Name()] = sibling
  262. queue = append(queue, sibling)
  263. pipeline.AddItem(sibling)
  264. }
  265. }
  266. }
  267. }
  268. return item
  269. }
  270. func (pipeline *Pipeline) AddItem(item PipelineItem) PipelineItem {
  271. pipeline.items = append(pipeline.items, item)
  272. return item
  273. }
  274. func (pipeline *Pipeline) RemoveItem(item PipelineItem) {
  275. for i, reg := range pipeline.items {
  276. if reg == item {
  277. pipeline.items = append(pipeline.items[:i], pipeline.items[i+1:]...)
  278. return
  279. }
  280. }
  281. }
  282. func (pipeline *Pipeline) Len() int {
  283. return len(pipeline.items)
  284. }
  285. // Commits returns the critical path in the repository's history. It starts
  286. // from HEAD and traces commits backwards till the root. When it encounters
  287. // a merge (more than one parent), it always chooses the first parent.
  288. func (pipeline *Pipeline) Commits() []*object.Commit {
  289. result := []*object.Commit{}
  290. repository := pipeline.repository
  291. head, err := repository.Head()
  292. if err != nil {
  293. panic(err)
  294. }
  295. commit, err := repository.CommitObject(head.Hash())
  296. if err != nil {
  297. panic(err)
  298. }
  299. // the first parent matches the head
  300. for ; err != io.EOF; commit, err = commit.Parents().Next() {
  301. if err != nil {
  302. panic(err)
  303. }
  304. result = append(result, commit)
  305. }
  306. // reverse the order
  307. for i, j := 0, len(result)-1; i < j; i, j = i+1, j-1 {
  308. result[i], result[j] = result[j], result[i]
  309. }
  310. return result
  311. }
  312. type sortablePipelineItems []PipelineItem
  313. func (items sortablePipelineItems) Len() int {
  314. return len(items)
  315. }
  316. func (items sortablePipelineItems) Less(i, j int) bool {
  317. return items[i].Name() < items[j].Name()
  318. }
  319. func (items sortablePipelineItems) Swap(i, j int) {
  320. items[i], items[j] = items[j], items[i]
  321. }
  322. func (pipeline *Pipeline) resolve(dumpPath string) {
  323. graph := toposort.NewGraph()
  324. sort.Sort(sortablePipelineItems(pipeline.items))
  325. name2item := map[string]PipelineItem{}
  326. ambiguousMap := map[string][]string{}
  327. nameUsages := map[string]int{}
  328. for _, item := range pipeline.items {
  329. nameUsages[item.Name()]++
  330. }
  331. counters := map[string]int{}
  332. for _, item := range pipeline.items {
  333. name := item.Name()
  334. if nameUsages[name] > 1 {
  335. index := counters[item.Name()] + 1
  336. counters[item.Name()] = index
  337. name = fmt.Sprintf("%s_%d", item.Name(), index)
  338. }
  339. graph.AddNode(name)
  340. name2item[name] = item
  341. for _, key := range item.Provides() {
  342. key = "[" + key + "]"
  343. graph.AddNode(key)
  344. if graph.AddEdge(name, key) > 1 {
  345. if ambiguousMap[key] != nil {
  346. fmt.Fprintln(os.Stderr, "Pipeline:")
  347. for _, item2 := range pipeline.items {
  348. if item2 == item {
  349. fmt.Fprint(os.Stderr, "> ")
  350. }
  351. fmt.Fprint(os.Stderr, item2.Name(), " [")
  352. for i, key2 := range item2.Provides() {
  353. fmt.Fprint(os.Stderr, key2)
  354. if i < len(item.Provides())-1 {
  355. fmt.Fprint(os.Stderr, ", ")
  356. }
  357. }
  358. fmt.Fprintln(os.Stderr, "]")
  359. }
  360. panic("Failed to resolve pipeline dependencies: ambiguous graph.")
  361. }
  362. ambiguousMap[key] = graph.FindParents(key)
  363. }
  364. }
  365. }
  366. counters = map[string]int{}
  367. for _, item := range pipeline.items {
  368. name := item.Name()
  369. if nameUsages[name] > 1 {
  370. index := counters[item.Name()] + 1
  371. counters[item.Name()] = index
  372. name = fmt.Sprintf("%s_%d", item.Name(), index)
  373. }
  374. for _, key := range item.Requires() {
  375. key = "[" + key + "]"
  376. if graph.AddEdge(key, name) == 0 {
  377. panic(fmt.Sprintf("Unsatisfied dependency: %s -> %s", key, item.Name()))
  378. }
  379. }
  380. }
  381. if len(ambiguousMap) > 0 {
  382. ambiguous := []string{}
  383. for key := range ambiguousMap {
  384. ambiguous = append(ambiguous, key)
  385. }
  386. sort.Strings(ambiguous)
  387. bfsorder := graph.BreadthSort()
  388. bfsindex := map[string]int{}
  389. for i, s := range bfsorder {
  390. bfsindex[s] = i
  391. }
  392. for len(ambiguous) > 0 {
  393. key := ambiguous[0]
  394. ambiguous = ambiguous[1:]
  395. pair := ambiguousMap[key]
  396. inheritor := pair[1]
  397. if bfsindex[pair[1]] < bfsindex[pair[0]] {
  398. inheritor = pair[0]
  399. }
  400. removed := graph.RemoveEdge(key, inheritor)
  401. cycle := map[string]bool{}
  402. for _, node := range graph.FindCycle(key) {
  403. cycle[node] = true
  404. }
  405. if len(cycle) == 0 {
  406. cycle[inheritor] = true
  407. }
  408. if removed {
  409. graph.AddEdge(key, inheritor)
  410. }
  411. graph.RemoveEdge(inheritor, key)
  412. graph.ReindexNode(inheritor)
  413. // for all nodes key links to except those in cycle, put the link from inheritor
  414. for _, node := range graph.FindChildren(key) {
  415. if _, exists := cycle[node]; !exists {
  416. graph.AddEdge(inheritor, node)
  417. graph.RemoveEdge(key, node)
  418. }
  419. }
  420. graph.ReindexNode(key)
  421. }
  422. }
  423. var graphCopy *toposort.Graph
  424. if dumpPath != "" {
  425. graphCopy = graph.Copy()
  426. }
  427. strplan, ok := graph.Toposort()
  428. if !ok {
  429. panic("Failed to resolve pipeline dependencies: unable to topologically sort the items.")
  430. }
  431. pipeline.items = make([]PipelineItem, 0, len(pipeline.items))
  432. for _, key := range strplan {
  433. if item, ok := name2item[key]; ok {
  434. pipeline.items = append(pipeline.items, item)
  435. }
  436. }
  437. if dumpPath != "" {
  438. // If there is a floating difference, uncomment this:
  439. // fmt.Fprint(os.Stderr, graphCopy.DebugDump())
  440. ioutil.WriteFile(dumpPath, []byte(graphCopy.Serialize(strplan)), 0666)
  441. absPath, _ := filepath.Abs(dumpPath)
  442. fmt.Fprintf(os.Stderr, "Wrote the DAG to %s\n", absPath)
  443. }
  444. }
  445. func (pipeline *Pipeline) Initialize(facts map[string]interface{}) {
  446. if facts == nil {
  447. facts = map[string]interface{}{}
  448. }
  449. if _, exists := facts[ConfigPipelineCommits]; !exists {
  450. facts[ConfigPipelineCommits] = pipeline.Commits()
  451. }
  452. dumpPath, _ := facts[ConfigPipelineDumpPath].(string)
  453. pipeline.resolve(dumpPath)
  454. if dryRun, _ := facts[ConfigPipelineDryRun].(bool); dryRun {
  455. return
  456. }
  457. for _, item := range pipeline.items {
  458. item.Configure(facts)
  459. }
  460. for _, item := range pipeline.items {
  461. item.Initialize(pipeline.repository)
  462. }
  463. }
  464. // Run method executes the pipeline.
  465. //
  466. // commits is a slice with the sequential commit history. It shall start from
  467. // the root (ascending order).
  468. //
  469. // Returns the mapping from each LeafPipelineItem to the corresponding analysis result.
  470. // There is always a "nil" record with CommonAnalysisResult.
  471. func (pipeline *Pipeline) Run(commits []*object.Commit) (map[LeafPipelineItem]interface{}, error) {
  472. startRunTime := time.Now()
  473. onProgress := pipeline.OnProgress
  474. if onProgress == nil {
  475. onProgress = func(int, int) {}
  476. }
  477. for index, commit := range commits {
  478. onProgress(index, len(commits))
  479. state := map[string]interface{}{"commit": commit, "index": index}
  480. for _, item := range pipeline.items {
  481. update, err := item.Consume(state)
  482. if err != nil {
  483. fmt.Fprintf(os.Stderr, "%s failed on commit #%d %s\n",
  484. item.Name(), index, commit.Hash.String())
  485. return nil, err
  486. }
  487. for _, key := range item.Provides() {
  488. val, ok := update[key]
  489. if !ok {
  490. panic(fmt.Sprintf("%s: Consume() did not return %s", item.Name(), key))
  491. }
  492. state[key] = val
  493. }
  494. }
  495. }
  496. onProgress(len(commits), len(commits))
  497. result := map[LeafPipelineItem]interface{}{}
  498. for _, item := range pipeline.items {
  499. if casted, ok := item.(LeafPipelineItem); ok {
  500. result[casted] = casted.Finalize()
  501. }
  502. }
  503. result[nil] = &CommonAnalysisResult{
  504. BeginTime: commits[0].Author.When.Unix(),
  505. EndTime: commits[len(commits)-1].Author.When.Unix(),
  506. CommitsNumber: len(commits),
  507. RunTime: time.Since(startRunTime),
  508. }
  509. return result, nil
  510. }
  511. // LoadCommitsFromFile reads the file by the specified FS path and generates the sequence of commits
  512. // by interpreting each line as a Git commit hash.
  513. func LoadCommitsFromFile(path string, repository *git.Repository) ([]*object.Commit, error) {
  514. var file io.ReadCloser
  515. if path != "-" {
  516. var err error
  517. file, err = os.Open(path)
  518. if err != nil {
  519. return nil, err
  520. }
  521. defer file.Close()
  522. } else {
  523. file = os.Stdin
  524. }
  525. scanner := bufio.NewScanner(file)
  526. commits := []*object.Commit{}
  527. for scanner.Scan() {
  528. hash := plumbing.NewHash(scanner.Text())
  529. if len(hash) != 20 {
  530. return nil, errors.New("invalid commit hash " + scanner.Text())
  531. }
  532. commit, err := repository.CommitObject(hash)
  533. if err != nil {
  534. return nil, err
  535. }
  536. commits = append(commits, commit)
  537. }
  538. return commits, nil
  539. }