12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178 |
- package core
- import (
- "bufio"
- "errors"
- "fmt"
- "io"
- "io/ioutil"
- "log"
- "os"
- "path/filepath"
- "sort"
- "strings"
- "time"
- "gopkg.in/src-d/go-git.v4"
- "gopkg.in/src-d/go-git.v4/plumbing"
- "gopkg.in/src-d/go-git.v4/plumbing/object"
- "gopkg.in/src-d/hercules.v4/internal/pb"
- "gopkg.in/src-d/hercules.v4/internal/toposort"
- )
- // ConfigurationOptionType represents the possible types of a ConfigurationOption's value.
- type ConfigurationOptionType int
- const (
- // BoolConfigurationOption reflects the boolean value type.
- BoolConfigurationOption ConfigurationOptionType = iota
- // IntConfigurationOption reflects the integer value type.
- IntConfigurationOption
- // StringConfigurationOption reflects the string value type.
- StringConfigurationOption
- // FloatConfigurationOption reflects a floating point value type.
- FloatConfigurationOption
- // StringsConfigurationOption reflects the array of strings value type.
- StringsConfigurationOption
- )
- // String() returns an empty string for the boolean type, "int" for integers and "string" for
- // strings. It is used in the command line interface to show the argument's type.
- func (opt ConfigurationOptionType) String() string {
- switch opt {
- case BoolConfigurationOption:
- return ""
- case IntConfigurationOption:
- return "int"
- case StringConfigurationOption:
- return "string"
- case FloatConfigurationOption:
- return "float"
- case StringsConfigurationOption:
- return "string"
- }
- panic(fmt.Sprintf("Invalid ConfigurationOptionType value %d", opt))
- }
- // ConfigurationOption allows for the unified, retrospective way to setup PipelineItem-s.
- type ConfigurationOption struct {
- // Name identifies the configuration option in facts.
- Name string
- // Description represents the help text about the configuration option.
- Description string
- // Flag corresponds to the CLI token with "--" prepended.
- Flag string
- // Type specifies the kind of the configuration option's value.
- Type ConfigurationOptionType
- // Default is the initial value of the configuration option.
- Default interface{}
- }
- // FormatDefault converts the default value of ConfigurationOption to string.
- // Used in the command line interface to show the argument's default value.
- func (opt ConfigurationOption) FormatDefault() string {
- if opt.Type == StringsConfigurationOption {
- return fmt.Sprintf("\"%s\"", strings.Join(opt.Default.([]string), ","))
- }
- if opt.Type != StringConfigurationOption {
- return fmt.Sprint(opt.Default)
- }
- return fmt.Sprintf("\"%s\"", opt.Default)
- }
- // PipelineItem is the interface for all the units in the Git commits analysis pipeline.
- type PipelineItem interface {
- // Name returns the name of the analysis.
- Name() string
- // Provides returns the list of keys of reusable calculated entities.
- // Other items may depend on them.
- Provides() []string
- // Requires returns the list of keys of needed entities which must be supplied in Consume().
- Requires() []string
- // ListConfigurationOptions returns the list of available options which can be consumed by Configure().
- ListConfigurationOptions() []ConfigurationOption
- // Configure performs the initial setup of the object by applying parameters from facts.
- // It allows to create PipelineItems in a universal way.
- Configure(facts map[string]interface{})
- // Initialize prepares and resets the item. Consume() requires Initialize()
- // to be called at least once beforehand.
- Initialize(*git.Repository)
- // Consume processes the next commit.
- // deps contains the required entities which match Depends(). Besides, it always includes
- // DependencyCommit and DependencyIndex.
- // Returns the calculated entities which match Provides().
- Consume(deps map[string]interface{}) (map[string]interface{}, error)
- // Fork clones the item the requested number of times. The data links between the clones
- // are up to the implementation. Needed to handle Git branches. See also Merge().
- // Returns a slice with `n` fresh clones. In other words, it does not include the original item.
- Fork(n int) []PipelineItem
- // Merge combines several branches together. Each is supposed to have been created with Fork().
- // The result is stored in the called item, thus this function returns nothing.
- // Merge() must update all the branches, not only self. When several branches merge, some of
- // them may continue to live, hence this requirement.
- Merge(branches []PipelineItem)
- }
- // FeaturedPipelineItem enables switching the automatic insertion of pipeline items on or off.
- type FeaturedPipelineItem interface {
- PipelineItem
- // Features returns the list of names which enable this item to be automatically inserted
- // in Pipeline.DeployItem().
- Features() []string
- }
- // LeafPipelineItem corresponds to the top level pipeline items which produce the end results.
- type LeafPipelineItem interface {
- PipelineItem
- // Flag returns the cmdline name of the item.
- Flag() string
- // Finalize returns the result of the analysis.
- Finalize() interface{}
- // Serialize encodes the object returned by Finalize() to YAML or Protocol Buffers.
- Serialize(result interface{}, binary bool, writer io.Writer) error
- }
- // MergeablePipelineItem specifies the methods to combine several analysis results together.
- type MergeablePipelineItem interface {
- LeafPipelineItem
- // Deserialize loads the result from Protocol Buffers blob.
- Deserialize(pbmessage []byte) (interface{}, error)
- // MergeResults joins two results together. Common-s are specified as the global state.
- MergeResults(r1, r2 interface{}, c1, c2 *CommonAnalysisResult) interface{}
- }
- // CommonAnalysisResult holds the information which is always extracted at Pipeline.Run().
- type CommonAnalysisResult struct {
- // Time of the first commit in the analysed sequence.
- BeginTime int64
- // Time of the last commit in the analysed sequence.
- EndTime int64
- // The number of commits in the analysed sequence.
- CommitsNumber int
- // The duration of Pipeline.Run().
- RunTime time.Duration
- }
- // BeginTimeAsTime converts the UNIX timestamp of the beginning to Go time.
- func (car *CommonAnalysisResult) BeginTimeAsTime() time.Time {
- return time.Unix(car.BeginTime, 0)
- }
- // EndTimeAsTime converts the UNIX timestamp of the ending to Go time.
- func (car *CommonAnalysisResult) EndTimeAsTime() time.Time {
- return time.Unix(car.EndTime, 0)
- }
- // Merge combines the CommonAnalysisResult with an other one.
- // We choose the earlier BeginTime, the later EndTime, sum the number of commits and the
- // elapsed run times.
- func (car *CommonAnalysisResult) Merge(other *CommonAnalysisResult) {
- if car.EndTime == 0 || other.BeginTime == 0 {
- panic("Merging with an uninitialized CommonAnalysisResult")
- }
- if other.BeginTime < car.BeginTime {
- car.BeginTime = other.BeginTime
- }
- if other.EndTime > car.EndTime {
- car.EndTime = other.EndTime
- }
- car.CommitsNumber += other.CommitsNumber
- car.RunTime += other.RunTime
- }
- // FillMetadata copies the data to a Protobuf message.
- func (car *CommonAnalysisResult) FillMetadata(meta *pb.Metadata) *pb.Metadata {
- meta.BeginUnixTime = car.BeginTime
- meta.EndUnixTime = car.EndTime
- meta.Commits = int32(car.CommitsNumber)
- meta.RunTime = car.RunTime.Nanoseconds() / 1e6
- return meta
- }
- // Metadata is defined in internal/pb/pb.pb.go - header of the binary file.
- type Metadata = pb.Metadata
- // MetadataToCommonAnalysisResult copies the data from a Protobuf message.
- func MetadataToCommonAnalysisResult(meta *Metadata) *CommonAnalysisResult {
- return &CommonAnalysisResult{
- BeginTime: meta.BeginUnixTime,
- EndTime: meta.EndUnixTime,
- CommitsNumber: int(meta.Commits),
- RunTime: time.Duration(meta.RunTime * 1e6),
- }
- }
- // Pipeline is the core Hercules entity which carries several PipelineItems and executes them.
- // See the extended example of how a Pipeline works in doc.go
- type Pipeline struct {
- // OnProgress is the callback which is invoked in Analyse() to output it's
- // progress. The first argument is the number of complete steps and the
- // second is the total number of steps.
- OnProgress func(int, int)
- // Repository points to the analysed Git repository struct from go-git.
- repository *git.Repository
- // Items are the registered building blocks in the pipeline. The order defines the
- // execution sequence.
- items []PipelineItem
- // The collection of parameters to create items.
- facts map[string]interface{}
- // Feature flags which enable the corresponding items.
- features map[string]bool
- }
- const (
- // ConfigPipelineDumpPath is the name of the Pipeline configuration option (Pipeline.Initialize())
- // which enables saving the items DAG to the specified file.
- ConfigPipelineDumpPath = "Pipeline.DumpPath"
- // ConfigPipelineDryRun is the name of the Pipeline configuration option (Pipeline.Initialize())
- // which disables Configure() and Initialize() invocation on each PipelineItem during the
- // Pipeline initialization.
- // Subsequent Run() calls are going to fail. Useful with ConfigPipelineDumpPath=true.
- ConfigPipelineDryRun = "Pipeline.DryRun"
- // ConfigPipelineCommits is the name of the Pipeline configuration option (Pipeline.Initialize())
- // which allows to specify the custom commit sequence. By default, Pipeline.Commits() is used.
- ConfigPipelineCommits = "commits"
- // DependencyCommit is the name of one of the two items in `deps` supplied to PipelineItem.Consume()
- // which always exists. It corresponds to the currently analyzed commit.
- DependencyCommit = "commit"
- // DependencyIndex is the name of one of the two items in `deps` supplied to PipelineItem.Consume()
- // which always exists. It corresponds to the currently analyzed commit's index.
- DependencyIndex = "index"
- )
- // NewPipeline initializes a new instance of Pipeline struct.
- func NewPipeline(repository *git.Repository) *Pipeline {
- return &Pipeline{
- repository: repository,
- items: []PipelineItem{},
- facts: map[string]interface{}{},
- features: map[string]bool{},
- }
- }
- // GetFact returns the value of the fact with the specified name.
- func (pipeline *Pipeline) GetFact(name string) interface{} {
- return pipeline.facts[name]
- }
- // SetFact sets the value of the fact with the specified name.
- func (pipeline *Pipeline) SetFact(name string, value interface{}) {
- pipeline.facts[name] = value
- }
- // GetFeature returns the state of the feature with the specified name (enabled/disabled) and
- // whether it exists. See also: FeaturedPipelineItem.
- func (pipeline *Pipeline) GetFeature(name string) (bool, bool) {
- val, exists := pipeline.features[name]
- return val, exists
- }
- // SetFeature sets the value of the feature with the specified name.
- // See also: FeaturedPipelineItem.
- func (pipeline *Pipeline) SetFeature(name string) {
- pipeline.features[name] = true
- }
- // SetFeaturesFromFlags enables the features which were specified through the command line flags
- // which belong to the given PipelineItemRegistry instance.
- // See also: AddItem().
- func (pipeline *Pipeline) SetFeaturesFromFlags(registry ...*PipelineItemRegistry) {
- var ffr *PipelineItemRegistry
- if len(registry) == 0 {
- ffr = Registry
- } else if len(registry) == 1 {
- ffr = registry[0]
- } else {
- panic("Zero or one registry is allowed to be passed.")
- }
- for _, feature := range ffr.featureFlags.Flags {
- pipeline.SetFeature(feature)
- }
- }
- // DeployItem inserts a PipelineItem into the pipeline. It also recursively creates all of it's
- // dependencies (PipelineItem.Requires()). Returns the same item as specified in the arguments.
- func (pipeline *Pipeline) DeployItem(item PipelineItem) PipelineItem {
- fpi, ok := item.(FeaturedPipelineItem)
- if ok {
- for _, f := range fpi.Features() {
- pipeline.SetFeature(f)
- }
- }
- queue := []PipelineItem{}
- queue = append(queue, item)
- added := map[string]PipelineItem{}
- for _, item := range pipeline.items {
- added[item.Name()] = item
- }
- added[item.Name()] = item
- pipeline.AddItem(item)
- for len(queue) > 0 {
- head := queue[0]
- queue = queue[1:]
- for _, dep := range head.Requires() {
- for _, sibling := range Registry.Summon(dep) {
- if _, exists := added[sibling.Name()]; !exists {
- disabled := false
- // If this item supports features, check them against the activated in pipeline.features
- if fpi, matches := sibling.(FeaturedPipelineItem); matches {
- for _, feature := range fpi.Features() {
- if !pipeline.features[feature] {
- disabled = true
- break
- }
- }
- }
- if disabled {
- continue
- }
- added[sibling.Name()] = sibling
- queue = append(queue, sibling)
- pipeline.AddItem(sibling)
- }
- }
- }
- }
- return item
- }
- // AddItem inserts a PipelineItem into the pipeline. It does not check any dependencies.
- // See also: DeployItem().
- func (pipeline *Pipeline) AddItem(item PipelineItem) PipelineItem {
- pipeline.items = append(pipeline.items, item)
- return item
- }
- // RemoveItem deletes a PipelineItem from the pipeline. It leaves all the rest of the items intact.
- func (pipeline *Pipeline) RemoveItem(item PipelineItem) {
- for i, reg := range pipeline.items {
- if reg == item {
- pipeline.items = append(pipeline.items[:i], pipeline.items[i+1:]...)
- return
- }
- }
- }
- // Len returns the number of items in the pipeline.
- func (pipeline *Pipeline) Len() int {
- return len(pipeline.items)
- }
- // Commits returns the critical path in the repository's history. It starts
- // from HEAD and traces commits backwards till the root. When it encounters
- // a merge (more than one parent), it always chooses the first parent.
- func (pipeline *Pipeline) Commits() []*object.Commit {
- result := []*object.Commit{}
- repository := pipeline.repository
- head, err := repository.Head()
- if err != nil {
- panic(err)
- }
- commit, err := repository.CommitObject(head.Hash())
- if err != nil {
- panic(err)
- }
- // the first parent matches the head
- for ; err != io.EOF; commit, err = commit.Parents().Next() {
- if err != nil {
- panic(err)
- }
- result = append(result, commit)
- }
- // reverse the order
- for i, j := 0, len(result)-1; i < j; i, j = i+1, j-1 {
- result[i], result[j] = result[j], result[i]
- }
- return result
- }
- type sortablePipelineItems []PipelineItem
- func (items sortablePipelineItems) Len() int {
- return len(items)
- }
- func (items sortablePipelineItems) Less(i, j int) bool {
- return items[i].Name() < items[j].Name()
- }
- func (items sortablePipelineItems) Swap(i, j int) {
- items[i], items[j] = items[j], items[i]
- }
- func (pipeline *Pipeline) resolve(dumpPath string) {
- graph := toposort.NewGraph()
- sort.Sort(sortablePipelineItems(pipeline.items))
- name2item := map[string]PipelineItem{}
- ambiguousMap := map[string][]string{}
- nameUsages := map[string]int{}
- for _, item := range pipeline.items {
- nameUsages[item.Name()]++
- }
- counters := map[string]int{}
- for _, item := range pipeline.items {
- name := item.Name()
- if nameUsages[name] > 1 {
- index := counters[item.Name()] + 1
- counters[item.Name()] = index
- name = fmt.Sprintf("%s_%d", item.Name(), index)
- }
- graph.AddNode(name)
- name2item[name] = item
- for _, key := range item.Provides() {
- key = "[" + key + "]"
- graph.AddNode(key)
- if graph.AddEdge(name, key) > 1 {
- if ambiguousMap[key] != nil {
- fmt.Fprintln(os.Stderr, "Pipeline:")
- for _, item2 := range pipeline.items {
- if item2 == item {
- fmt.Fprint(os.Stderr, "> ")
- }
- fmt.Fprint(os.Stderr, item2.Name(), " [")
- for i, key2 := range item2.Provides() {
- fmt.Fprint(os.Stderr, key2)
- if i < len(item.Provides())-1 {
- fmt.Fprint(os.Stderr, ", ")
- }
- }
- fmt.Fprintln(os.Stderr, "]")
- }
- panic("Failed to resolve pipeline dependencies: ambiguous graph.")
- }
- ambiguousMap[key] = graph.FindParents(key)
- }
- }
- }
- counters = map[string]int{}
- for _, item := range pipeline.items {
- name := item.Name()
- if nameUsages[name] > 1 {
- index := counters[item.Name()] + 1
- counters[item.Name()] = index
- name = fmt.Sprintf("%s_%d", item.Name(), index)
- }
- for _, key := range item.Requires() {
- key = "[" + key + "]"
- if graph.AddEdge(key, name) == 0 {
- panic(fmt.Sprintf("Unsatisfied dependency: %s -> %s", key, item.Name()))
- }
- }
- }
- // Try to break the cycles in some known scenarios.
- if len(ambiguousMap) > 0 {
- ambiguous := []string{}
- for key := range ambiguousMap {
- ambiguous = append(ambiguous, key)
- }
- sort.Strings(ambiguous)
- bfsorder := graph.BreadthSort()
- bfsindex := map[string]int{}
- for i, s := range bfsorder {
- bfsindex[s] = i
- }
- for len(ambiguous) > 0 {
- key := ambiguous[0]
- ambiguous = ambiguous[1:]
- pair := ambiguousMap[key]
- inheritor := pair[1]
- if bfsindex[pair[1]] < bfsindex[pair[0]] {
- inheritor = pair[0]
- }
- removed := graph.RemoveEdge(key, inheritor)
- cycle := map[string]bool{}
- for _, node := range graph.FindCycle(key) {
- cycle[node] = true
- }
- if len(cycle) == 0 {
- cycle[inheritor] = true
- }
- if removed {
- graph.AddEdge(key, inheritor)
- }
- graph.RemoveEdge(inheritor, key)
- graph.ReindexNode(inheritor)
- // for all nodes key links to except those in cycle, put the link from inheritor
- for _, node := range graph.FindChildren(key) {
- if _, exists := cycle[node]; !exists {
- graph.AddEdge(inheritor, node)
- graph.RemoveEdge(key, node)
- }
- }
- graph.ReindexNode(key)
- }
- }
- var graphCopy *toposort.Graph
- if dumpPath != "" {
- graphCopy = graph.Copy()
- }
- strplan, ok := graph.Toposort()
- if !ok {
- panic("Failed to resolve pipeline dependencies: unable to topologically sort the items.")
- }
- pipeline.items = make([]PipelineItem, 0, len(pipeline.items))
- for _, key := range strplan {
- if item, ok := name2item[key]; ok {
- pipeline.items = append(pipeline.items, item)
- }
- }
- if dumpPath != "" {
- // If there is a floating difference, uncomment this:
- // fmt.Fprint(os.Stderr, graphCopy.DebugDump())
- ioutil.WriteFile(dumpPath, []byte(graphCopy.Serialize(strplan)), 0666)
- absPath, _ := filepath.Abs(dumpPath)
- log.Printf("Wrote the DAG to %s\n", absPath)
- }
- }
- // Initialize prepares the pipeline for the execution (Run()). This function
- // resolves the execution DAG, Configure()-s and Initialize()-s the items in it in the
- // topological dependency order. `facts` are passed inside Configure(). They are mutable.
- func (pipeline *Pipeline) Initialize(facts map[string]interface{}) {
- if facts == nil {
- facts = map[string]interface{}{}
- }
- if _, exists := facts[ConfigPipelineCommits]; !exists {
- facts[ConfigPipelineCommits] = pipeline.Commits()
- }
- dumpPath, _ := facts[ConfigPipelineDumpPath].(string)
- pipeline.resolve(dumpPath)
- if dryRun, _ := facts[ConfigPipelineDryRun].(bool); dryRun {
- return
- }
- for _, item := range pipeline.items {
- item.Configure(facts)
- }
- for _, item := range pipeline.items {
- item.Initialize(pipeline.repository)
- }
- }
- // Run method executes the pipeline.
- //
- // `commits` is a slice with the git commits to analyse. Multiple branches are supported.
- //
- // Returns the mapping from each LeafPipelineItem to the corresponding analysis result.
- // There is always a "nil" record with CommonAnalysisResult.
- func (pipeline *Pipeline) Run(commits []*object.Commit) (map[LeafPipelineItem]interface{}, error) {
- startRunTime := time.Now()
- onProgress := pipeline.OnProgress
- if onProgress == nil {
- onProgress = func(int, int) {}
- }
- plan := prepareRunPlan(commits)
- progressSteps := len(plan) + 2
- branches := map[int][]PipelineItem{0: pipeline.items}
- for index, step := range plan {
- onProgress(index + 1, progressSteps)
- firstItem := step.Items[0]
- switch step.Action {
- case runActionCommit:
- state := map[string]interface{}{
- DependencyCommit: step.Commit,
- DependencyIndex: index,
- }
- for _, item := range branches[firstItem] {
- update, err := item.Consume(state)
- if err != nil {
- log.Printf("%s failed on commit #%d %s\n",
- item.Name(), index + 1, step.Commit.Hash.String())
- return nil, err
- }
- for _, key := range item.Provides() {
- val, ok := update[key]
- if !ok {
- panic(fmt.Sprintf("%s: Consume() did not return %s", item.Name(), key))
- }
- state[key] = val
- }
- }
- case runActionFork:
- for i, clone := range cloneItems(branches[firstItem], len(step.Items)-1) {
- branches[step.Items[i+1]] = clone
- }
- case runActionMerge:
- merged := make([][]PipelineItem, len(step.Items))
- for i, b := range step.Items {
- merged[i] = branches[b]
- }
- mergeItems(merged)
- case runActionDelete:
- delete(branches, firstItem)
- }
- }
- onProgress(len(plan) + 1, progressSteps)
- result := map[LeafPipelineItem]interface{}{}
- for _, item := range getMasterBranch(branches) {
- if casted, ok := item.(LeafPipelineItem); ok {
- result[casted] = casted.Finalize()
- }
- }
- onProgress(progressSteps, progressSteps)
- result[nil] = &CommonAnalysisResult{
- BeginTime: commits[0].Author.When.Unix(),
- EndTime: commits[len(commits)-1].Author.When.Unix(),
- CommitsNumber: len(commits),
- RunTime: time.Since(startRunTime),
- }
- return result, nil
- }
- // LoadCommitsFromFile reads the file by the specified FS path and generates the sequence of commits
- // by interpreting each line as a Git commit hash.
- func LoadCommitsFromFile(path string, repository *git.Repository) ([]*object.Commit, error) {
- var file io.ReadCloser
- if path != "-" {
- var err error
- file, err = os.Open(path)
- if err != nil {
- return nil, err
- }
- defer file.Close()
- } else {
- file = os.Stdin
- }
- scanner := bufio.NewScanner(file)
- var commits []*object.Commit
- for scanner.Scan() {
- hash := plumbing.NewHash(scanner.Text())
- if len(hash) != 20 {
- return nil, errors.New("invalid commit hash " + scanner.Text())
- }
- commit, err := repository.CommitObject(hash)
- if err != nil {
- return nil, err
- }
- commits = append(commits, commit)
- }
- return commits, nil
- }
- const (
- // runActionCommit corresponds to a regular commit
- runActionCommit = 0
- // runActionFork splits a branch into several parts
- runActionFork = iota
- // runActionMerge merges several branches together
- runActionMerge = iota
- // runActionDelete removes the branch as it is no longer needed
- runActionDelete = iota
- )
- type runAction struct {
- Action int
- Commit *object.Commit
- Items []int
- }
- func cloneItems(origin []PipelineItem, n int) [][]PipelineItem {
- clones := make([][]PipelineItem, n)
- for j := 0; j < n; j++ {
- clones[j] = make([]PipelineItem, len(origin))
- }
- for i, item := range origin {
- itemClones := item.Fork(n)
- for j := 0; j < n; j++ {
- clones[j][i] = itemClones[j]
- }
- }
- return clones
- }
- func mergeItems(branches [][]PipelineItem) {
- buffer := make([]PipelineItem, len(branches) - 1)
- for i, item := range branches[0] {
- for j := 0; j < len(branches)-1; j++ {
- buffer[j] = branches[j+1][i]
- }
- item.Merge(buffer)
- }
- }
- // getMasterBranch returns the branch with the smallest index.
- func getMasterBranch(branches map[int][]PipelineItem) []PipelineItem {
- minKey := 1 << 31
- var minVal []PipelineItem
- for key, val := range branches {
- if key < minKey {
- minKey = key
- minVal = val
- }
- }
- return minVal
- }
- // prepareRunPlan schedules the actions for Pipeline.Run().
- func prepareRunPlan(commits []*object.Commit) []runAction {
- hashes, dag := buildDag(commits)
- leaveRootComponent(hashes, dag)
- numParents := bindNumParents(hashes, dag)
- mergedDag, mergedSeq := mergeDag(numParents, hashes, dag)
- orderNodes := bindOrderNodes(mergedDag)
- collapseFastForwards(orderNodes, hashes, mergedDag, dag, mergedSeq)
- /*fmt.Printf("digraph Hercules {\n")
- for i, c := range order {
- commit := hashes[c]
- fmt.Printf(" \"%s\"[label=\"[%d] %s\"]\n", commit.Hash.String(), i, commit.Hash.String()[:6])
- for _, child := range mergedDag[commit.Hash] {
- fmt.Printf(" \"%s\" -> \"%s\"\n", commit.Hash.String(), child.Hash.String())
- }
- }
- fmt.Printf("}\n")*/
- plan := generatePlan(orderNodes, numParents, hashes, mergedDag, dag, mergedSeq)
- plan = optimizePlan(plan)
- return plan
- }
- // buildDag generates the raw commit DAG and the commit hash map.
- func buildDag(commits []*object.Commit) (
- map[string]*object.Commit, map[plumbing.Hash][]*object.Commit) {
- hashes := map[string]*object.Commit{}
- for _, commit := range commits {
- hashes[commit.Hash.String()] = commit
- }
- dag := map[plumbing.Hash][]*object.Commit{}
- for _, commit := range commits {
- if _, exists := dag[commit.Hash]; !exists {
- dag[commit.Hash] = make([]*object.Commit, 0, 1)
- }
- for _, parent := range commit.ParentHashes {
- if _, exists := hashes[parent.String()]; !exists {
- continue
- }
- children := dag[parent]
- if children == nil {
- children = make([]*object.Commit, 0, 1)
- }
- dag[parent] = append(children, commit)
- }
- }
- return hashes, dag
- }
- // bindNumParents returns curried "numParents" function.
- func bindNumParents(
- hashes map[string]*object.Commit,
- dag map[plumbing.Hash][]*object.Commit) func(c *object.Commit) int {
- return func(c *object.Commit) int {
- r := 0
- for _, parent := range c.ParentHashes {
- if p, exists := hashes[parent.String()]; exists {
- for _, pc := range dag[p.Hash] {
- if pc.Hash == c.Hash {
- r++
- break
- }
- }
- }
- }
- return r
- }
- }
- // leaveRootComponent runs connected components analysis and throws away everything
- // but the part which grows from the root.
- func leaveRootComponent(
- hashes map[string]*object.Commit,
- dag map[plumbing.Hash][]*object.Commit) {
- visited := map[plumbing.Hash]bool{}
- var sets [][]plumbing.Hash
- for key := range dag {
- if visited[key] {
- continue
- }
- var set []plumbing.Hash
- for queue := []plumbing.Hash{key}; len(queue) > 0; {
- head := queue[len(queue)-1]
- queue = queue[:len(queue)-1]
- if visited[head] {
- continue
- }
- set = append(set, head)
- visited[head] = true
- for _, c := range dag[head] {
- if !visited[c.Hash] {
- queue = append(queue, c.Hash)
- }
- }
- if commit, exists := hashes[head.String()]; exists {
- for _, p := range commit.ParentHashes {
- if !visited[p] {
- if _, exists := hashes[p.String()]; exists {
- queue = append(queue, p)
- }
- }
- }
- }
- }
- sets = append(sets, set)
- }
- if len(sets) > 1 {
- maxlen := 0
- maxind := -1
- for i, set := range sets {
- if len(set) > maxlen {
- maxlen = len(set)
- maxind = i
- }
- }
- for i, set := range sets {
- if i == maxind {
- continue
- }
- for _, h := range set {
- log.Printf("warning: dropped %s from the analysis - disjoint", h.String())
- delete(dag, h)
- delete(hashes, h.String())
- }
- }
- }
- }
- // bindOrderNodes returns curried "orderNodes" function.
- func bindOrderNodes(mergedDag map[plumbing.Hash][]*object.Commit) func(reverse bool) []string {
- return func(reverse bool) []string {
- graph := toposort.NewGraph()
- keys := make([]plumbing.Hash, 0, len(mergedDag))
- for key := range mergedDag {
- keys = append(keys, key)
- }
- sort.Slice(keys, func(i, j int) bool { return keys[i].String() < keys[j].String() })
- for _, key := range keys {
- graph.AddNode(key.String())
- }
- for _, key := range keys {
- children := mergedDag[key]
- sort.Slice(children, func(i, j int) bool {
- return children[i].Hash.String() < children[j].Hash.String()
- })
- for _, c := range children {
- graph.AddEdge(key.String(), c.Hash.String())
- }
- }
- order, ok := graph.Toposort()
- if !ok {
- // should never happen
- panic("Could not topologically sort the DAG of commits")
- }
- if reverse {
- // one day this must appear in the standard library...
- for i, j := 0, len(order)-1; i < len(order)/2; i, j = i+1, j-1 {
- order[i], order[j] = order[j], order[i]
- }
- }
- return order
- }
- }
- // mergeDag turns sequences of consecutive commits into single nodes.
- func mergeDag(
- numParents func(c *object.Commit) int,
- hashes map[string]*object.Commit,
- dag map[plumbing.Hash][]*object.Commit) (
- mergedDag, mergedSeq map[plumbing.Hash][]*object.Commit) {
- parentOf := func(c *object.Commit) plumbing.Hash {
- var parent plumbing.Hash
- for _, p := range c.ParentHashes {
- if _, exists := hashes[p.String()]; exists {
- if parent != plumbing.ZeroHash {
- // more than one parent
- return plumbing.ZeroHash
- }
- parent = p
- }
- }
- return parent
- }
- mergedDag = map[plumbing.Hash][]*object.Commit{}
- mergedSeq = map[plumbing.Hash][]*object.Commit{}
- visited := map[plumbing.Hash]bool{}
- for ch := range dag {
- c := hashes[ch.String()]
- if visited[c.Hash] {
- continue
- }
- for true {
- parent := parentOf(c)
- if parent == plumbing.ZeroHash || len(dag[parent]) != 1 {
- break
- }
- c = hashes[parent.String()]
- }
- head := c
- var seq []*object.Commit
- children := dag[c.Hash]
- for true {
- visited[c.Hash] = true
- seq = append(seq, c)
- if len(children) != 1 {
- break
- }
- c = children[0]
- children = dag[c.Hash]
- if numParents(c) != 1 {
- break
- }
- }
- mergedSeq[head.Hash] = seq
- mergedDag[head.Hash] = dag[seq[len(seq)-1].Hash]
- }
- return
- }
- // collapseFastForwards removes the fast forward merges.
- func collapseFastForwards(
- orderNodes func(reverse bool) []string,
- hashes map[string]*object.Commit,
- mergedDag, dag, mergedSeq map[plumbing.Hash][]*object.Commit) {
- for _, strkey := range orderNodes(true) {
- key := hashes[strkey].Hash
- vals, exists := mergedDag[key]
- if !exists {
- continue
- }
- if len(vals) == 2 {
- grand1 := mergedDag[vals[0].Hash]
- grand2 := mergedDag[vals[1].Hash]
- if len(grand2) == 1 && vals[0].Hash == grand2[0].Hash {
- mergedDag[key] = mergedDag[vals[0].Hash]
- dag[key] = vals[1:]
- delete(mergedDag, vals[0].Hash)
- delete(mergedDag, vals[1].Hash)
- mergedSeq[key] = append(mergedSeq[key], mergedSeq[vals[1].Hash]...)
- mergedSeq[key] = append(mergedSeq[key], mergedSeq[vals[0].Hash]...)
- delete(mergedSeq, vals[0].Hash)
- delete(mergedSeq, vals[1].Hash)
- }
- // symmetric
- if len(grand1) == 1 && vals[1].Hash == grand1[0].Hash {
- mergedDag[key] = mergedDag[vals[1].Hash]
- dag[key] = vals[:1]
- delete(mergedDag, vals[0].Hash)
- delete(mergedDag, vals[1].Hash)
- mergedSeq[key] = append(mergedSeq[key], mergedSeq[vals[0].Hash]...)
- mergedSeq[key] = append(mergedSeq[key], mergedSeq[vals[1].Hash]...)
- delete(mergedSeq, vals[0].Hash)
- delete(mergedSeq, vals[1].Hash)
- }
- }
- }
- }
- // generatePlan creates the list of actions from the commit DAG.
- func generatePlan(
- orderNodes func(reverse bool) []string,
- numParents func(c *object.Commit) int,
- hashes map[string]*object.Commit,
- mergedDag, dag, mergedSeq map[plumbing.Hash][]*object.Commit) []runAction {
- var plan []runAction
- branches := map[plumbing.Hash]int{}
- counter := 1
- for seqIndex, name := range orderNodes(false) {
- commit := hashes[name]
- if seqIndex == 0 {
- branches[commit.Hash] = 0
- }
- var branch int
- {
- var exists bool
- branch, exists = branches[commit.Hash]
- if !exists {
- branch = -1
- }
- }
- branchExists := func() bool { return branch >= 0 }
- appendCommit := func(c *object.Commit, branch int) {
- plan = append(plan, runAction{
- Action: runActionCommit,
- Commit: c,
- Items: []int{branch},
- })
- }
- appendMergeIfNeeded := func() {
- if numParents(commit) < 2 {
- return
- }
- // merge after the merge commit (the first in the sequence)
- var items []int
- minBranch := 1 << 31
- for _, parent := range commit.ParentHashes {
- if _, exists := hashes[parent.String()]; exists {
- parentBranch := branches[parent]
- if len(dag[parent]) == 1 && minBranch > parentBranch {
- minBranch = parentBranch
- }
- items = append(items, parentBranch)
- if parentBranch != branch {
- appendCommit(commit, parentBranch)
- }
- }
- }
- if minBranch < 1 << 31 {
- branch = minBranch
- branches[commit.Hash] = minBranch
- } else if !branchExists() {
- panic("!branchExists()")
- }
- plan = append(plan, runAction{
- Action: runActionMerge,
- Commit: nil,
- Items: items,
- })
- }
- if subseq, exists := mergedSeq[commit.Hash]; exists {
- for subseqIndex, offspring := range subseq {
- if branchExists() {
- appendCommit(offspring, branch)
- }
- if subseqIndex == 0 {
- appendMergeIfNeeded()
- }
- }
- branches[subseq[len(subseq)-1].Hash] = branch
- }
- if len(mergedDag[commit.Hash]) > 1 {
- branches[mergedDag[commit.Hash][0].Hash] = branch
- children := []int{branch}
- for i, child := range mergedDag[commit.Hash] {
- if i > 0 {
- branches[child.Hash] = counter
- children = append(children, counter)
- counter++
- }
- }
- plan = append(plan, runAction{
- Action: runActionFork,
- Commit: nil,
- Items: children,
- })
- }
- }
- return plan
- }
- // optimizePlan removes "dead" nodes and inserts `runActionDelete` disposal steps.
- //
- // | *
- // * /
- // |\/
- // |/
- // *
- //
- func optimizePlan(plan []runAction) []runAction {
- // lives maps branch index to the number of commits in that branch
- lives := map[int]int{}
- // lastMentioned maps branch index to the index inside `plan` when that branch was last used
- lastMentioned := map[int]int{}
- for i, p := range plan {
- firstItem := p.Items[0]
- switch p.Action {
- case runActionCommit:
- lives[firstItem]++
- lastMentioned[firstItem] = i
- case runActionFork:
- lastMentioned[firstItem] = i
- case runActionMerge:
- for _, item := range p.Items {
- lastMentioned[item] = i
- }
- }
- }
- branchesToDelete := map[int]bool{}
- for key, life := range lives {
- if life == 1 {
- branchesToDelete[key] = true
- delete(lastMentioned, key)
- }
- }
- var optimizedPlan []runAction
- lastMentionedArr := make([][2]int, 0, len(lastMentioned) + 1)
- for key, val := range lastMentioned {
- if val != len(plan) - 1 {
- lastMentionedArr = append(lastMentionedArr, [2]int{val, key})
- }
- }
- if len(lastMentionedArr) == 0 && len(branchesToDelete) == 0 {
- // early return - we have nothing to optimize
- return plan
- }
- sort.Slice(lastMentionedArr, func(i, j int) bool {
- return lastMentionedArr[i][0] < lastMentionedArr[j][0]
- })
- lastMentionedArr = append(lastMentionedArr, [2]int{len(plan)-1, -1})
- prevpi := -1
- for _, pair := range lastMentionedArr {
- for pi := prevpi + 1; pi <= pair[0]; pi++ {
- p := plan[pi]
- switch p.Action {
- case runActionCommit:
- if !branchesToDelete[p.Items[0]] {
- optimizedPlan = append(optimizedPlan, p)
- }
- case runActionFork:
- var newBranches []int
- for _, b := range p.Items {
- if !branchesToDelete[b] {
- newBranches = append(newBranches, b)
- }
- }
- if len(newBranches) > 1 {
- optimizedPlan = append(optimizedPlan, runAction{
- Action: runActionFork,
- Commit: p.Commit,
- Items: newBranches,
- })
- }
- case runActionMerge:
- var newBranches []int
- for _, b := range p.Items {
- if !branchesToDelete[b] {
- newBranches = append(newBranches, b)
- }
- }
- if len(newBranches) > 1 {
- optimizedPlan = append(optimizedPlan, runAction{
- Action: runActionMerge,
- Commit: p.Commit,
- Items: newBranches,
- })
- }
- }
- }
- if pair[1] >= 0 {
- prevpi = pair[0]
- optimizedPlan = append(optimizedPlan, runAction{
- Action: runActionDelete,
- Commit: nil,
- Items: []int{pair[1]},
- })
- }
- }
- // single commit can be detected as redundant
- if len(optimizedPlan) > 0 {
- return optimizedPlan
- }
- return plan
- // TODO(vmarkovtsev): there can be also duplicate redundant merges, e.g.
- /*
- 0 4e34f03d829fbacb71cde0e010de87ea945dc69a [3]
- 0 4e34f03d829fbacb71cde0e010de87ea945dc69a [12]
- 2 [3 12]
- 0 06716c2b39422938b77ddafa4d5c39bb9e4476da [3]
- 0 06716c2b39422938b77ddafa4d5c39bb9e4476da [12]
- 2 [3 12]
- 0 1219c7bf9e0e1a93459a052ab8b351bfc379dc19 [12]
- */
- }
|