pipeline.go 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. package hercules
  2. import (
  3. "bufio"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "os"
  9. "reflect"
  10. "gopkg.in/src-d/go-git.v4"
  11. "gopkg.in/src-d/go-git.v4/plumbing"
  12. "gopkg.in/src-d/go-git.v4/plumbing/object"
  13. "gopkg.in/src-d/hercules.v2/toposort"
  14. )
  15. type PipelineItem interface {
  16. // Name returns the name of the analysis.
  17. Name() string
  18. // Provides returns the list of keys of reusable calculated entities.
  19. // Other items may depend on them.
  20. Provides() []string
  21. // Requires returns the list of keys of needed entities which must be supplied in Consume().
  22. Requires() []string
  23. // Construct performs the initial creation of the object by taking parameters from facts.
  24. // It allows to create PipelineItems in a universal way.
  25. Construct(facts map[string]interface{})
  26. // Initialize prepares and resets the item. Consume() requires Initialize()
  27. // to be called at least once beforehand.
  28. Initialize(*git.Repository)
  29. // Consume processes the next commit.
  30. // deps contains the required entities which match Depends(). Besides, it always includes
  31. // "commit" and "index".
  32. // Returns the calculated entities which match Provides().
  33. Consume(deps map[string]interface{}) (map[string]interface{}, error)
  34. // Finalize returns the result of the analysis.
  35. Finalize() interface{}
  36. }
  37. type PipelineItemRegistry struct {
  38. provided map[string][]reflect.Type
  39. }
  40. func (registry *PipelineItemRegistry) Register(example PipelineItem) {
  41. if registry.provided == nil {
  42. registry.provided = map[string][]reflect.Type{}
  43. }
  44. t := reflect.TypeOf(example)
  45. for _, dep := range example.Provides() {
  46. ts := registry.provided[dep]
  47. if ts == nil {
  48. ts = []reflect.Type{}
  49. }
  50. ts = append(ts, t)
  51. registry.provided[dep] = ts
  52. }
  53. }
  54. func (registry *PipelineItemRegistry) Summon(provides string) []PipelineItem {
  55. if registry.provided == nil {
  56. return []PipelineItem{}
  57. }
  58. ts := registry.provided[provides]
  59. items := []PipelineItem{}
  60. for _, t := range ts {
  61. items = append(items, reflect.New(t.Elem()).Interface().(PipelineItem))
  62. }
  63. return items
  64. }
  65. var Registry = &PipelineItemRegistry{}
  66. type wrappedPipelineItem struct {
  67. Item PipelineItem
  68. Children []wrappedPipelineItem
  69. }
  70. type Pipeline struct {
  71. // OnProgress is the callback which is invoked in Analyse() to output it's
  72. // progress. The first argument is the number of processed commits and the
  73. // second is the total number of commits.
  74. OnProgress func(int, int)
  75. // repository points to the analysed Git repository struct from go-git.
  76. repository *git.Repository
  77. // items are the registered analysers in the pipeline.
  78. items []PipelineItem
  79. // plan is the resolved execution sequence.
  80. plan []PipelineItem
  81. // the collection of parameters to create items.
  82. facts map[string]interface{}
  83. }
  84. func NewPipeline(repository *git.Repository) *Pipeline {
  85. return &Pipeline{repository: repository, items: []PipelineItem{}, plan: []PipelineItem{}}
  86. }
  87. func (pipeline *Pipeline) GetFact(name string) interface{} {
  88. return pipeline.facts[name]
  89. }
  90. func (pipeline *Pipeline) SetFact(name string, value interface{}) {
  91. pipeline.facts[name] = value
  92. }
  93. func (pipeline *Pipeline) DeployItem(item PipelineItem) PipelineItem {
  94. queue := []PipelineItem{}
  95. queue = append(queue, item)
  96. added := map[string]PipelineItem{}
  97. added[item.Name()] = item
  98. pipeline.AddItem(item)
  99. for len(queue) > 0 {
  100. head := queue[0]
  101. queue = queue[1:]
  102. for _, dep := range head.Requires() {
  103. for _, sibling := range Registry.Summon(dep) {
  104. if _, exists := added[sibling.Name()]; !exists {
  105. added[sibling.Name()] = sibling
  106. queue = append(queue, sibling)
  107. pipeline.AddItem(sibling)
  108. }
  109. }
  110. }
  111. }
  112. return item
  113. }
  114. func (pipeline *Pipeline) AddItem(item PipelineItem) PipelineItem {
  115. pipeline.items = append(pipeline.items, item)
  116. return item
  117. }
  118. func (pipeline *Pipeline) RemoveItem(item PipelineItem) {
  119. for i, reg := range pipeline.items {
  120. if reg == item {
  121. pipeline.items = append(pipeline.items[:i], pipeline.items[i+1:]...)
  122. return
  123. }
  124. }
  125. }
  126. // Commits returns the critical path in the repository's history. It starts
  127. // from HEAD and traces commits backwards till the root. When it encounters
  128. // a merge (more than one parent), it always chooses the first parent.
  129. func (pipeline *Pipeline) Commits() []*object.Commit {
  130. result := []*object.Commit{}
  131. repository := pipeline.repository
  132. head, err := repository.Head()
  133. if err != nil {
  134. panic(err)
  135. }
  136. commit, err := repository.CommitObject(head.Hash())
  137. if err != nil {
  138. panic(err)
  139. }
  140. // the first parent matches the head
  141. for ; err != io.EOF; commit, err = commit.Parents().Next() {
  142. if err != nil {
  143. panic(err)
  144. }
  145. result = append(result, commit)
  146. }
  147. // reverse the order
  148. for i, j := 0, len(result)-1; i < j; i, j = i+1, j-1 {
  149. result[i], result[j] = result[j], result[i]
  150. }
  151. return result
  152. }
  153. func (pipeline *Pipeline) Initialize(facts map[string]interface{}) {
  154. graph := toposort.NewGraph()
  155. name2item := map[string]PipelineItem{}
  156. ambiguousMap := map[string][]string{}
  157. nameUsages := map[string]int{}
  158. for _, item := range pipeline.items {
  159. nameUsages[item.Name()]++
  160. }
  161. counters := map[string]int{}
  162. for _, item := range pipeline.items {
  163. name := item.Name()
  164. if nameUsages[name] > 1 {
  165. index := counters[item.Name()] + 1
  166. counters[item.Name()] = index
  167. name = fmt.Sprintf("%s_%d", item.Name(), index)
  168. }
  169. graph.AddNode(name)
  170. name2item[name] = item
  171. for _, key := range item.Provides() {
  172. key = "[" + key + "]"
  173. graph.AddNode(key)
  174. if graph.AddEdge(name, key) > 1 {
  175. if ambiguousMap[key] != nil {
  176. panic("Failed to resolve pipeline dependencies.")
  177. }
  178. ambiguousMap[key] = graph.FindParents(key)
  179. }
  180. }
  181. }
  182. counters = map[string]int{}
  183. for _, item := range pipeline.items {
  184. name := item.Name()
  185. if nameUsages[name] > 1 {
  186. index := counters[item.Name()] + 1
  187. counters[item.Name()] = index
  188. name = fmt.Sprintf("%s_%d", item.Name(), index)
  189. }
  190. for _, key := range item.Requires() {
  191. key = "[" + key + "]"
  192. if graph.AddEdge(key, name) == 0 {
  193. panic(fmt.Sprintf("Unsatisfied dependency: %s -> %s", key, item.Name()))
  194. }
  195. }
  196. }
  197. if len(ambiguousMap) > 0 {
  198. ambiguous := []string{}
  199. for key := range ambiguousMap {
  200. ambiguous = append(ambiguous, key)
  201. }
  202. bfsorder := graph.BreadthSort()
  203. bfsindex := map[string]int{}
  204. for i, s := range bfsorder {
  205. bfsindex[s] = i
  206. }
  207. for len(ambiguous) > 0 {
  208. key := ambiguous[0]
  209. ambiguous = ambiguous[1:]
  210. pair := ambiguousMap[key]
  211. inheritor := pair[1]
  212. if bfsindex[pair[1]] < bfsindex[pair[0]] {
  213. inheritor = pair[0]
  214. }
  215. removed := graph.RemoveEdge(key, inheritor)
  216. cycle := map[string]bool{}
  217. for _, node := range graph.FindCycle(key) {
  218. cycle[node] = true
  219. }
  220. if len(cycle) == 0 {
  221. cycle[inheritor] = true
  222. }
  223. if removed {
  224. graph.AddEdge(key, inheritor)
  225. }
  226. graph.RemoveEdge(inheritor, key)
  227. graph.ReindexNode(inheritor)
  228. // for all nodes key links to except those in cycle, put the link from inheritor
  229. for _, node := range graph.FindChildren(key) {
  230. if _, exists := cycle[node]; !exists {
  231. graph.AddEdge(inheritor, node)
  232. graph.RemoveEdge(key, node)
  233. }
  234. }
  235. graph.ReindexNode(key)
  236. }
  237. }
  238. if dumpPath, exists := facts["Pipeline.DumpPath"].(string); exists {
  239. ioutil.WriteFile(dumpPath, []byte(graph.Serialize([]string{})), 0666)
  240. }
  241. var graphCopy *toposort.Graph
  242. if _, exists := facts["Pipeline.DumpPath"].(string); exists {
  243. graphCopy = graph.Copy()
  244. }
  245. strplan, ok := graph.Toposort()
  246. if !ok {
  247. panic("Failed to resolve pipeline dependencies.")
  248. }
  249. for _, key := range strplan {
  250. item, ok := name2item[key]
  251. if ok {
  252. pipeline.plan = append(pipeline.plan, item)
  253. }
  254. }
  255. if len(pipeline.plan) != len(pipeline.items) {
  256. panic("Internal pipeline dependency resolution error.")
  257. }
  258. if dumpPath, exists := facts["Pipeline.DumpPath"].(string); exists {
  259. ioutil.WriteFile(dumpPath, []byte(graphCopy.Serialize(strplan)), 0666)
  260. }
  261. if dryRun, exists := facts["Pipeline.DryRun"].(bool); exists && dryRun {
  262. return
  263. }
  264. for _, item := range pipeline.items {
  265. item.Construct(facts)
  266. }
  267. for _, item := range pipeline.items {
  268. item.Initialize(pipeline.repository)
  269. }
  270. }
  271. // Run executes the pipeline.
  272. //
  273. // commits is a slice with the sequential commit history. It shall start from
  274. // the root (ascending order).
  275. func (pipeline *Pipeline) Run(commits []*object.Commit) (map[PipelineItem]interface{}, error) {
  276. onProgress := pipeline.OnProgress
  277. if onProgress == nil {
  278. onProgress = func(int, int) {}
  279. }
  280. for index, commit := range commits {
  281. onProgress(index, len(commits))
  282. state := map[string]interface{}{"commit": commit, "index": index}
  283. for _, item := range pipeline.plan {
  284. update, err := item.Consume(state)
  285. if err != nil {
  286. fmt.Fprintf(os.Stderr, "%s failed on commit #%d %s\n",
  287. item.Name(), index, commit.Hash.String())
  288. return nil, err
  289. }
  290. for _, key := range item.Provides() {
  291. val, ok := update[key]
  292. if !ok {
  293. panic(fmt.Sprintf("%s: Consume() did not return %s", item.Name(), key))
  294. }
  295. state[key] = val
  296. }
  297. }
  298. }
  299. onProgress(len(commits), len(commits))
  300. result := map[PipelineItem]interface{}{}
  301. for _, item := range pipeline.items {
  302. result[item] = item.Finalize()
  303. }
  304. return result, nil
  305. }
  306. func LoadCommitsFromFile(path string, repository *git.Repository) ([]*object.Commit, error) {
  307. var file io.ReadCloser
  308. if path != "-" {
  309. var err error
  310. file, err = os.Open(path)
  311. if err != nil {
  312. return nil, err
  313. }
  314. defer file.Close()
  315. } else {
  316. file = os.Stdin
  317. }
  318. scanner := bufio.NewScanner(file)
  319. commits := []*object.Commit{}
  320. for scanner.Scan() {
  321. hash := plumbing.NewHash(scanner.Text())
  322. if len(hash) != 20 {
  323. return nil, errors.New("invalid commit hash " + scanner.Text())
  324. }
  325. commit, err := repository.CommitObject(hash)
  326. if err != nil {
  327. return nil, err
  328. }
  329. commits = append(commits, commit)
  330. }
  331. return commits, nil
  332. }