pipeline.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558
  1. package hercules
  2. import (
  3. "bufio"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "os"
  9. "path/filepath"
  10. "sort"
  11. "time"
  12. "gopkg.in/src-d/go-git.v4"
  13. "gopkg.in/src-d/go-git.v4/plumbing"
  14. "gopkg.in/src-d/go-git.v4/plumbing/object"
  15. "gopkg.in/src-d/hercules.v3/pb"
  16. "gopkg.in/src-d/hercules.v3/toposort"
  17. )
  18. type ConfigurationOptionType int
  19. const (
  20. // Boolean value type.
  21. BoolConfigurationOption ConfigurationOptionType = iota
  22. // Integer value type.
  23. IntConfigurationOption
  24. // String value type.
  25. StringConfigurationOption
  26. )
  27. func (opt ConfigurationOptionType) String() string {
  28. switch opt {
  29. case BoolConfigurationOption:
  30. return ""
  31. case IntConfigurationOption:
  32. return "int"
  33. case StringConfigurationOption:
  34. return "string"
  35. }
  36. panic(fmt.Sprintf("Invalid ConfigurationOptionType value %d", opt))
  37. }
  38. // ConfigurationOption allows for the unified, retrospective way to setup PipelineItem-s.
  39. type ConfigurationOption struct {
  40. // Name identifies the configuration option in facts.
  41. Name string
  42. // Description represents the help text about the configuration option.
  43. Description string
  44. // Flag corresponds to the CLI token with "-" prepended.
  45. Flag string
  46. // Type specifies the kind of the configuration option's value.
  47. Type ConfigurationOptionType
  48. // Default is the initial value of the configuration option.
  49. Default interface{}
  50. }
  51. func (opt ConfigurationOption) FormatDefault() string {
  52. if opt.Type != StringConfigurationOption {
  53. return fmt.Sprint(opt.Default)
  54. }
  55. return fmt.Sprintf("\"%s\"", opt.Default)
  56. }
  57. // PipelineItem is the interface for all the units of the Git commit analysis pipeline.
  58. type PipelineItem interface {
  59. // Name returns the name of the analysis.
  60. Name() string
  61. // Provides returns the list of keys of reusable calculated entities.
  62. // Other items may depend on them.
  63. Provides() []string
  64. // Requires returns the list of keys of needed entities which must be supplied in Consume().
  65. Requires() []string
  66. // ListConfigurationOptions returns the list of available options which can be consumed by Configure().
  67. ListConfigurationOptions() []ConfigurationOption
  68. // Configure performs the initial setup of the object by applying parameters from facts.
  69. // It allows to create PipelineItems in a universal way.
  70. Configure(facts map[string]interface{})
  71. // Initialize prepares and resets the item. Consume() requires Initialize()
  72. // to be called at least once beforehand.
  73. Initialize(*git.Repository)
  74. // Consume processes the next commit.
  75. // deps contains the required entities which match Depends(). Besides, it always includes
  76. // "commit" and "index".
  77. // Returns the calculated entities which match Provides().
  78. Consume(deps map[string]interface{}) (map[string]interface{}, error)
  79. }
  80. // FeaturedPipelineItem enables switching the automatic insertion of pipeline items on or off.
  81. type FeaturedPipelineItem interface {
  82. PipelineItem
  83. // Features returns the list of names which enable this item to be automatically inserted
  84. // in Pipeline.DeployItem().
  85. Features() []string
  86. }
  87. // LeafPipelineItem corresponds to the top level pipeline items which produce the end results.
  88. type LeafPipelineItem interface {
  89. PipelineItem
  90. // Flag returns the cmdline name of the item.
  91. Flag() string
  92. // Finalize returns the result of the analysis.
  93. Finalize() interface{}
  94. // Serialize encodes the object returned by Finalize() to YAML or Protocol Buffers.
  95. Serialize(result interface{}, binary bool, writer io.Writer) error
  96. }
  97. // MergeablePipelineItem specifies the methods to combine several analysis results together.
  98. type MergeablePipelineItem interface {
  99. LeafPipelineItem
  100. // Deserialize loads the result from Protocol Buffers blob.
  101. Deserialize(pbmessage []byte) (interface{}, error)
  102. // MergeResults joins two results together. Common-s are specified as the global state.
  103. MergeResults(r1, r2 interface{}, c1, c2 *CommonAnalysisResult) interface{}
  104. }
  105. // CommonAnalysisResult holds the information which is always extracted at Pipeline.Run().
  106. type CommonAnalysisResult struct {
  107. // Time of the first commit in the analysed sequence.
  108. BeginTime int64
  109. // Time of the last commit in the analysed sequence.
  110. EndTime int64
  111. // The number of commits in the analysed sequence.
  112. CommitsNumber int
  113. // The duration of Pipeline.Run().
  114. RunTime time.Duration
  115. }
  116. func (car *CommonAnalysisResult) BeginTimeAsTime() time.Time {
  117. return time.Unix(car.BeginTime, 0)
  118. }
  119. func (car *CommonAnalysisResult) EndTimeAsTime() time.Time {
  120. return time.Unix(car.EndTime, 0)
  121. }
  122. func (car *CommonAnalysisResult) Merge(other *CommonAnalysisResult) {
  123. if car.EndTime == 0 || other.BeginTime == 0 {
  124. panic("Merging with an uninitialized CommonAnalysisResult")
  125. }
  126. if other.BeginTime < car.BeginTime {
  127. car.BeginTime = other.BeginTime
  128. }
  129. if other.EndTime > car.EndTime {
  130. car.EndTime = other.EndTime
  131. }
  132. car.CommitsNumber += other.CommitsNumber
  133. car.RunTime += other.RunTime
  134. }
  135. func (car *CommonAnalysisResult) FillMetadata(meta *pb.Metadata) *pb.Metadata {
  136. meta.BeginUnixTime = car.BeginTime
  137. meta.EndUnixTime = car.EndTime
  138. meta.Commits = int32(car.CommitsNumber)
  139. meta.RunTime = car.RunTime.Nanoseconds() / 1e6
  140. return meta
  141. }
  142. func MetadataToCommonAnalysisResult(meta *pb.Metadata) *CommonAnalysisResult {
  143. return &CommonAnalysisResult{
  144. BeginTime: meta.BeginUnixTime,
  145. EndTime: meta.EndUnixTime,
  146. CommitsNumber: int(meta.Commits),
  147. RunTime: time.Duration(meta.RunTime * 1e6),
  148. }
  149. }
  150. type Pipeline struct {
  151. // OnProgress is the callback which is invoked in Analyse() to output it's
  152. // progress. The first argument is the number of processed commits and the
  153. // second is the total number of commits.
  154. OnProgress func(int, int)
  155. // Repository points to the analysed Git repository struct from go-git.
  156. repository *git.Repository
  157. // Items are the registered building blocks in the pipeline. The order defines the
  158. // execution sequence.
  159. items []PipelineItem
  160. // The collection of parameters to create items.
  161. facts map[string]interface{}
  162. // Feature flags which enable the corresponding items.
  163. features map[string]bool
  164. }
  165. const (
  166. ConfigPipelineDumpPath = "Pipeline.DumpPath"
  167. ConfigPipelineDryRun = "Pipeline.DryRun"
  168. FactPipelineCommits = "commits"
  169. )
  170. func NewPipeline(repository *git.Repository) *Pipeline {
  171. return &Pipeline{
  172. repository: repository,
  173. items: []PipelineItem{},
  174. facts: map[string]interface{}{},
  175. features: map[string]bool{},
  176. }
  177. }
  178. func (pipeline *Pipeline) GetFact(name string) interface{} {
  179. return pipeline.facts[name]
  180. }
  181. func (pipeline *Pipeline) SetFact(name string, value interface{}) {
  182. pipeline.facts[name] = value
  183. }
  184. func (pipeline *Pipeline) GetFeature(name string) (bool, bool) {
  185. val, exists := pipeline.features[name]
  186. return val, exists
  187. }
  188. func (pipeline *Pipeline) SetFeature(name string) {
  189. pipeline.features[name] = true
  190. }
  191. func (pipeline *Pipeline) SetFeaturesFromFlags(registry ...*PipelineItemRegistry) {
  192. var ffr *PipelineItemRegistry
  193. if len(registry) == 0 {
  194. ffr = Registry
  195. } else if len(registry) == 1 {
  196. ffr = registry[0]
  197. } else {
  198. panic("Zero or one registry is allowed to be passed.")
  199. }
  200. for _, feature := range ffr.featureFlags.Flags {
  201. pipeline.SetFeature(feature)
  202. }
  203. }
  204. func (pipeline *Pipeline) DeployItem(item PipelineItem) PipelineItem {
  205. fpi, ok := item.(FeaturedPipelineItem)
  206. if ok {
  207. for _, f := range fpi.Features() {
  208. pipeline.SetFeature(f)
  209. }
  210. }
  211. queue := []PipelineItem{}
  212. queue = append(queue, item)
  213. added := map[string]PipelineItem{}
  214. for _, item := range pipeline.items {
  215. added[item.Name()] = item
  216. }
  217. added[item.Name()] = item
  218. pipeline.AddItem(item)
  219. for len(queue) > 0 {
  220. head := queue[0]
  221. queue = queue[1:]
  222. for _, dep := range head.Requires() {
  223. for _, sibling := range Registry.Summon(dep) {
  224. if _, exists := added[sibling.Name()]; !exists {
  225. disabled := false
  226. // If this item supports features, check them against the activated in pipeline.features
  227. if fpi, matches := sibling.(FeaturedPipelineItem); matches {
  228. for _, feature := range fpi.Features() {
  229. if !pipeline.features[feature] {
  230. disabled = true
  231. break
  232. }
  233. }
  234. }
  235. if disabled {
  236. continue
  237. }
  238. added[sibling.Name()] = sibling
  239. queue = append(queue, sibling)
  240. pipeline.AddItem(sibling)
  241. }
  242. }
  243. }
  244. }
  245. return item
  246. }
  247. func (pipeline *Pipeline) AddItem(item PipelineItem) PipelineItem {
  248. pipeline.items = append(pipeline.items, item)
  249. return item
  250. }
  251. func (pipeline *Pipeline) RemoveItem(item PipelineItem) {
  252. for i, reg := range pipeline.items {
  253. if reg == item {
  254. pipeline.items = append(pipeline.items[:i], pipeline.items[i+1:]...)
  255. return
  256. }
  257. }
  258. }
  259. func (pipeline *Pipeline) Len() int {
  260. return len(pipeline.items)
  261. }
  262. // Commits returns the critical path in the repository's history. It starts
  263. // from HEAD and traces commits backwards till the root. When it encounters
  264. // a merge (more than one parent), it always chooses the first parent.
  265. func (pipeline *Pipeline) Commits() []*object.Commit {
  266. result := []*object.Commit{}
  267. repository := pipeline.repository
  268. head, err := repository.Head()
  269. if err != nil {
  270. panic(err)
  271. }
  272. commit, err := repository.CommitObject(head.Hash())
  273. if err != nil {
  274. panic(err)
  275. }
  276. // the first parent matches the head
  277. for ; err != io.EOF; commit, err = commit.Parents().Next() {
  278. if err != nil {
  279. panic(err)
  280. }
  281. result = append(result, commit)
  282. }
  283. // reverse the order
  284. for i, j := 0, len(result)-1; i < j; i, j = i+1, j-1 {
  285. result[i], result[j] = result[j], result[i]
  286. }
  287. return result
  288. }
  289. type sortablePipelineItems []PipelineItem
  290. func (items sortablePipelineItems) Len() int {
  291. return len(items)
  292. }
  293. func (items sortablePipelineItems) Less(i, j int) bool {
  294. return items[i].Name() < items[j].Name()
  295. }
  296. func (items sortablePipelineItems) Swap(i, j int) {
  297. items[i], items[j] = items[j], items[i]
  298. }
  299. func (pipeline *Pipeline) resolve(dumpPath string) {
  300. graph := toposort.NewGraph()
  301. sort.Sort(sortablePipelineItems(pipeline.items))
  302. name2item := map[string]PipelineItem{}
  303. ambiguousMap := map[string][]string{}
  304. nameUsages := map[string]int{}
  305. for _, item := range pipeline.items {
  306. nameUsages[item.Name()]++
  307. }
  308. counters := map[string]int{}
  309. for _, item := range pipeline.items {
  310. name := item.Name()
  311. if nameUsages[name] > 1 {
  312. index := counters[item.Name()] + 1
  313. counters[item.Name()] = index
  314. name = fmt.Sprintf("%s_%d", item.Name(), index)
  315. }
  316. graph.AddNode(name)
  317. name2item[name] = item
  318. for _, key := range item.Provides() {
  319. key = "[" + key + "]"
  320. graph.AddNode(key)
  321. if graph.AddEdge(name, key) > 1 {
  322. if ambiguousMap[key] != nil {
  323. fmt.Fprintln(os.Stderr, "Pipeline:")
  324. for _, item2 := range pipeline.items {
  325. if item2 == item {
  326. fmt.Fprint(os.Stderr, "> ")
  327. }
  328. fmt.Fprint(os.Stderr, item2.Name(), " [")
  329. for i, key2 := range item2.Provides() {
  330. fmt.Fprint(os.Stderr, key2)
  331. if i < len(item.Provides())-1 {
  332. fmt.Fprint(os.Stderr, ", ")
  333. }
  334. }
  335. fmt.Fprintln(os.Stderr, "]")
  336. }
  337. panic("Failed to resolve pipeline dependencies: ambiguous graph.")
  338. }
  339. ambiguousMap[key] = graph.FindParents(key)
  340. }
  341. }
  342. }
  343. counters = map[string]int{}
  344. for _, item := range pipeline.items {
  345. name := item.Name()
  346. if nameUsages[name] > 1 {
  347. index := counters[item.Name()] + 1
  348. counters[item.Name()] = index
  349. name = fmt.Sprintf("%s_%d", item.Name(), index)
  350. }
  351. for _, key := range item.Requires() {
  352. key = "[" + key + "]"
  353. if graph.AddEdge(key, name) == 0 {
  354. panic(fmt.Sprintf("Unsatisfied dependency: %s -> %s", key, item.Name()))
  355. }
  356. }
  357. }
  358. if len(ambiguousMap) > 0 {
  359. ambiguous := []string{}
  360. for key := range ambiguousMap {
  361. ambiguous = append(ambiguous, key)
  362. }
  363. sort.Strings(ambiguous)
  364. bfsorder := graph.BreadthSort()
  365. bfsindex := map[string]int{}
  366. for i, s := range bfsorder {
  367. bfsindex[s] = i
  368. }
  369. for len(ambiguous) > 0 {
  370. key := ambiguous[0]
  371. ambiguous = ambiguous[1:]
  372. pair := ambiguousMap[key]
  373. inheritor := pair[1]
  374. if bfsindex[pair[1]] < bfsindex[pair[0]] {
  375. inheritor = pair[0]
  376. }
  377. removed := graph.RemoveEdge(key, inheritor)
  378. cycle := map[string]bool{}
  379. for _, node := range graph.FindCycle(key) {
  380. cycle[node] = true
  381. }
  382. if len(cycle) == 0 {
  383. cycle[inheritor] = true
  384. }
  385. if removed {
  386. graph.AddEdge(key, inheritor)
  387. }
  388. graph.RemoveEdge(inheritor, key)
  389. graph.ReindexNode(inheritor)
  390. // for all nodes key links to except those in cycle, put the link from inheritor
  391. for _, node := range graph.FindChildren(key) {
  392. if _, exists := cycle[node]; !exists {
  393. graph.AddEdge(inheritor, node)
  394. graph.RemoveEdge(key, node)
  395. }
  396. }
  397. graph.ReindexNode(key)
  398. }
  399. }
  400. var graphCopy *toposort.Graph
  401. if dumpPath != "" {
  402. graphCopy = graph.Copy()
  403. }
  404. strplan, ok := graph.Toposort()
  405. if !ok {
  406. panic("Failed to resolve pipeline dependencies: unable to topologically sort the items.")
  407. }
  408. pipeline.items = make([]PipelineItem, 0, len(pipeline.items))
  409. for _, key := range strplan {
  410. if item, ok := name2item[key]; ok {
  411. pipeline.items = append(pipeline.items, item)
  412. }
  413. }
  414. if dumpPath != "" {
  415. // If there is a floating difference, uncomment this:
  416. // fmt.Fprint(os.Stderr, graphCopy.DebugDump())
  417. ioutil.WriteFile(dumpPath, []byte(graphCopy.Serialize(strplan)), 0666)
  418. absPath, _ := filepath.Abs(dumpPath)
  419. fmt.Fprintf(os.Stderr, "Wrote the DAG to %s\n", absPath)
  420. }
  421. }
  422. func (pipeline *Pipeline) Initialize(facts map[string]interface{}) {
  423. if facts == nil {
  424. facts = map[string]interface{}{}
  425. }
  426. if _, exists := facts[FactPipelineCommits]; !exists {
  427. facts[FactPipelineCommits] = pipeline.Commits()
  428. }
  429. dumpPath, _ := facts[ConfigPipelineDumpPath].(string)
  430. pipeline.resolve(dumpPath)
  431. if dryRun, _ := facts[ConfigPipelineDryRun].(bool); dryRun {
  432. return
  433. }
  434. for _, item := range pipeline.items {
  435. item.Configure(facts)
  436. }
  437. for _, item := range pipeline.items {
  438. item.Initialize(pipeline.repository)
  439. }
  440. }
  441. // Run method executes the pipeline.
  442. //
  443. // commits is a slice with the sequential commit history. It shall start from
  444. // the root (ascending order).
  445. //
  446. // Returns the mapping from each LeafPipelineItem to the corresponding analysis result.
  447. // There is always a "nil" record with CommonAnalysisResult.
  448. func (pipeline *Pipeline) Run(commits []*object.Commit) (map[LeafPipelineItem]interface{}, error) {
  449. startRunTime := time.Now()
  450. onProgress := pipeline.OnProgress
  451. if onProgress == nil {
  452. onProgress = func(int, int) {}
  453. }
  454. for index, commit := range commits {
  455. onProgress(index, len(commits))
  456. state := map[string]interface{}{"commit": commit, "index": index}
  457. for _, item := range pipeline.items {
  458. update, err := item.Consume(state)
  459. if err != nil {
  460. fmt.Fprintf(os.Stderr, "%s failed on commit #%d %s\n",
  461. item.Name(), index, commit.Hash.String())
  462. return nil, err
  463. }
  464. for _, key := range item.Provides() {
  465. val, ok := update[key]
  466. if !ok {
  467. panic(fmt.Sprintf("%s: Consume() did not return %s", item.Name(), key))
  468. }
  469. state[key] = val
  470. }
  471. }
  472. }
  473. onProgress(len(commits), len(commits))
  474. result := map[LeafPipelineItem]interface{}{}
  475. for _, item := range pipeline.items {
  476. if casted, ok := item.(LeafPipelineItem); ok {
  477. result[casted] = casted.Finalize()
  478. }
  479. }
  480. result[nil] = &CommonAnalysisResult{
  481. BeginTime: commits[0].Author.When.Unix(),
  482. EndTime: commits[len(commits)-1].Author.When.Unix(),
  483. CommitsNumber: len(commits),
  484. RunTime: time.Since(startRunTime),
  485. }
  486. return result, nil
  487. }
  488. func LoadCommitsFromFile(path string, repository *git.Repository) ([]*object.Commit, error) {
  489. var file io.ReadCloser
  490. if path != "-" {
  491. var err error
  492. file, err = os.Open(path)
  493. if err != nil {
  494. return nil, err
  495. }
  496. defer file.Close()
  497. } else {
  498. file = os.Stdin
  499. }
  500. scanner := bufio.NewScanner(file)
  501. commits := []*object.Commit{}
  502. for scanner.Scan() {
  503. hash := plumbing.NewHash(scanner.Text())
  504. if len(hash) != 20 {
  505. return nil, errors.New("invalid commit hash " + scanner.Text())
  506. }
  507. commit, err := repository.CommitObject(hash)
  508. if err != nil {
  509. return nil, err
  510. }
  511. commits = append(commits, commit)
  512. }
  513. return commits, nil
  514. }