فهرست منبع

Improve the fast-forward optimizer

Signed-off-by: Vadim Markovtsev <vadim@sourced.tech>
Vadim Markovtsev 6 سال پیش
والد
کامیت
6e2d24e150
2فایلهای تغییر یافته به همراه215 افزوده شده و 93 حذف شده
  1. 123 82
      internal/core/forks.go
  2. 92 11
      internal/core/pipeline_test.go

+ 123 - 82
internal/core/forks.go

@@ -131,9 +131,18 @@ func prepareRunPlan(commits []*object.Commit) []runAction {
 	hashes, dag := buildDag(commits)
 	leaveRootComponent(hashes, dag)
 	numParents := bindNumParents(hashes, dag)
-	mergedDag, mergedSeq := mergeDag(numParents, hashes, dag)
+	mergedDag, mergedSeq := mergeDag(hashes, dag)
 	orderNodes := bindOrderNodes(mergedDag)
-	collapseFastForwards(orderNodes, numParents, hashes, mergedDag, dag, mergedSeq)
+	collapseFastForwards(orderNodes, hashes, mergedDag, dag, mergedSeq)
+	/*fmt.Printf("digraph Hercules {\n")
+	for i, c := range orderNodes(false, false) {
+		commit := hashes[c]
+		fmt.Printf("  \"%s\"[label=\"[%d] %s\"]\n", commit.Hash.String(), i, commit.Hash.String()[:6])
+		for _, child := range mergedDag[commit.Hash] {
+			fmt.Printf("  \"%s\" -> \"%s\"\n", commit.Hash.String(), child.Hash.String())
+		}
+	}
+	fmt.Printf("}\n")*/
 	plan := generatePlan(orderNodes, numParents, hashes, mergedDag, dag, mergedSeq)
 	plan = optimizePlan(plan)
 	/*for _, p := range plan {
@@ -299,101 +308,66 @@ func bindOrderNodes(mergedDag map[plumbing.Hash][]*object.Commit) orderer {
 
 // mergeDag turns sequences of consecutive commits into single nodes.
 func mergeDag(
-	numParents func(c *object.Commit) int,
 	hashes map[string]*object.Commit,
 	dag map[plumbing.Hash][]*object.Commit) (
 	mergedDag, mergedSeq map[plumbing.Hash][]*object.Commit) {
 
-	parentOf := func(c *object.Commit) plumbing.Hash {
-		var parent plumbing.Hash
-		for _, p := range c.ParentHashes {
-			if _, exists := hashes[p.String()]; exists {
-				if parent != plumbing.ZeroHash {
-					// more than one parent
-					return plumbing.ZeroHash
-				}
-				parent = p
-			}
+	parents := map[plumbing.Hash][]plumbing.Hash{}
+	for key, vals := range dag {
+		for _, val := range vals {
+			parents[val.Hash] = append(parents[val.Hash], key)
 		}
-		return parent
 	}
 	mergedDag = map[plumbing.Hash][]*object.Commit{}
 	mergedSeq = map[plumbing.Hash][]*object.Commit{}
 	visited := map[plumbing.Hash]bool{}
-	for ch := range dag {
-		c := hashes[ch.String()]
-		if visited[c.Hash] {
+	for head := range dag {
+		if visited[head] {
 			continue
 		}
+		c := head
 		for true {
-			parent := parentOf(c)
-			if parent == plumbing.ZeroHash || len(dag[parent]) != 1 {
+			next := parents[c]
+			if len(next) != 1 || len(dag[next[0]]) != 1 {
 				break
 			}
-			c = hashes[parent.String()]
+			c = next[0]
 		}
-		head := c
+		head = c
 		var seq []*object.Commit
-		children := dag[c.Hash]
 		for true {
-			visited[c.Hash] = true
-			seq = append(seq, c)
-			if len(children) != 1 {
+			visited[c] = true
+			seq = append(seq, hashes[c.String()])
+			if len(dag[c]) != 1 {
 				break
 			}
-			c = children[0]
-			children = dag[c.Hash]
-			if numParents(c) != 1 {
+			c = dag[c][0].Hash
+			if len(parents[c]) != 1 {
 				break
 			}
 		}
-		mergedSeq[head.Hash] = seq
-		mergedDag[head.Hash] = dag[seq[len(seq)-1].Hash]
+		mergedSeq[head] = seq
+		mergedDag[head] = dag[seq[len(seq)-1].Hash]
 	}
 	return
 }
 
 // collapseFastForwards removes the fast forward merges.
 func collapseFastForwards(
-	orderNodes orderer, numParents func(c *object.Commit) int,
-	hashes map[string]*object.Commit,
+	orderNodes orderer, hashes map[string]*object.Commit,
 	mergedDag, dag, mergedSeq map[plumbing.Hash][]*object.Commit)  {
 
-	for _, strkey := range orderNodes(true, false) {
-		key := hashes[strkey].Hash
-		vals, exists := mergedDag[key]
-		if !exists {
-			continue
-		}
-		if len(vals) != 2 {
-			continue
-		}
-		grand1 := mergedDag[vals[0].Hash]
-		grand2 := mergedDag[vals[1].Hash]
-		if len(grand2) == 1 && vals[0].Hash == grand2[0].Hash && numParents(vals[1]) == 1 {
-			mergedDag[key] = mergedDag[vals[0].Hash]
-			dag[key] = vals[1:]
-			delete(mergedDag, vals[0].Hash)
-			delete(mergedDag, vals[1].Hash)
-			mergedSeq[key] = append(mergedSeq[key], mergedSeq[vals[1].Hash]...)
-			mergedSeq[key] = append(mergedSeq[key], mergedSeq[vals[0].Hash]...)
-			delete(mergedSeq, vals[0].Hash)
-			delete(mergedSeq, vals[1].Hash)
-		}
-		// symmetric
-		if len(grand1) == 1 && vals[1].Hash == grand1[0].Hash && numParents(vals[0]) == 1 {
-			mergedDag[key] = mergedDag[vals[1].Hash]
-			dag[key] = vals[:1]
-			delete(mergedDag, vals[0].Hash)
-			delete(mergedDag, vals[1].Hash)
-			mergedSeq[key] = append(mergedSeq[key], mergedSeq[vals[0].Hash]...)
-			mergedSeq[key] = append(mergedSeq[key], mergedSeq[vals[1].Hash]...)
-			delete(mergedSeq, vals[0].Hash)
-			delete(mergedSeq, vals[1].Hash)
+	parents := map[plumbing.Hash][]plumbing.Hash{}
+	for key, vals := range mergedDag {
+		for _, val := range vals {
+			parents[val.Hash] = append(parents[val.Hash], key)
 		}
 	}
-	for _, strkey := range orderNodes(true, false) {
+	processed := map[plumbing.Hash]bool{}
+	for _, strkey := range orderNodes(false, true) {
 		key := hashes[strkey].Hash
+		processed[key] = true
+		repeat:
 		vals, exists := mergedDag[key]
 		if !exists {
 			continue
@@ -401,24 +375,63 @@ func collapseFastForwards(
 		if len(vals) < 2 {
 			continue
 		}
+		/*println()
+		println()
+		var logvals []string
+		for _, v := range vals {
+			logvals = append(logvals, v.Hash.String())
+		}
+		fmt.Println("candidate", key.String(), logvals)*/
 		toRemove := map[plumbing.Hash]bool{}
-		for x, child := range vals {
-			grands := mergedDag[child.Hash]
-			if len(grands) != 1 {
-				continue
+		for _, child := range vals {
+			var queue []plumbing.Hash
+			visited := map[plumbing.Hash]bool{child.Hash: true}
+			childParents := parents[child.Hash]
+			childNumOtherParents := 0
+			for _, parent := range childParents {
+				if parent != key {
+					visited[parent] = true
+					childNumOtherParents++
+					queue = append(queue, parent)
+				}
 			}
-			grand := grands[0]
-			for y, otherChild := range vals {
-				if y == x {
-					continue
+			var immediateParent plumbing.Hash
+			if childNumOtherParents == 1 {
+				immediateParent = queue[0]
+			}
+			//fmt.Println("queue", key.String(), child.Hash, queue)
+			for len(queue) > 0 {
+				head := queue[len(queue)-1]
+				queue = queue[:len(queue)-1]
+				if processed[head] {
+					//fmt.Println("processed", key.String(), head)
+					if head == key {
+						toRemove[child.Hash] = true
+						//fmt.Println("remove", key.String(), child.Hash.String(), immediateParent.String())
+						if childNumOtherParents == 1 && len(mergedDag[immediateParent]) == 1 {
+							//println("mokpyxa", key.String(), child.Hash.String(), immediateParent.String())
+							mergedSeq[immediateParent] = append(
+								mergedSeq[immediateParent], mergedSeq[child.Hash]...)
+							delete(mergedSeq, child.Hash)
+							mergedDag[immediateParent] = mergedDag[child.Hash]
+							delete(mergedDag, child.Hash)
+							parents[child.Hash] = parents[immediateParent]
+							for _, vals := range parents {
+								for i, v := range vals {
+									if v == child.Hash {
+										vals[i] = immediateParent
+										break
+									}
+								}
+							}
+						}
+					}
+					break
 				}
-				if otherChild.Hash == grand.Hash {
-					toRemove[otherChild.Hash] = true
-					if numParents(child) == 1 {
-						mergedSeq[child.Hash] = append(mergedSeq[child.Hash], mergedSeq[grand.Hash]...)
-						delete(mergedSeq, grand.Hash)
-						mergedDag[child.Hash] = mergedDag[grand.Hash]
-						delete(mergedDag, grand.Hash)
+				for _, parent := range parents[head] {
+					if !visited[parent] {
+						visited[head] = true
+						queue = append(queue, parent)
 					}
 				}
 			}
@@ -432,14 +445,42 @@ func collapseFastForwards(
 				newVals = append(newVals, child)
 			}
 		}
-		mergedDag[key] = newVals
+		merged := false
+		if len(newVals) == 1 {
+			onlyChild := newVals[0].Hash
+			if len(parents[onlyChild]) == 1 {
+				merged = true
+				mergedSeq[key] = append(mergedSeq[key], mergedSeq[onlyChild]...)
+				delete(mergedSeq, onlyChild)
+				mergedDag[key] = mergedDag[onlyChild]
+				delete(mergedDag, onlyChild)
+				parents[onlyChild] = parents[key]
+				for _, vals := range parents {
+					for i, v := range vals {
+						if v == onlyChild {
+							vals[i] = key
+							break
+						}
+					}
+				}
+				//fmt.Println("merge", key.String(), onlyChild.String())
+			}
+		}
+		if !merged {
+			//fmt.Println("prune", key.String(), newVals)
+			mergedDag[key] = newVals
+		}
 		newVals = []*object.Commit{}
-		for _, child := range dag[key] {
+		node := mergedSeq[key][len(mergedSeq[key])-1].Hash
+		for _, child := range dag[node] {
 			if !toRemove[child.Hash] {
 				newVals = append(newVals, child)
 			}
 		}
-		dag[key] = newVals
+		dag[node] = newVals
+		if merged {
+			goto repeat
+		}
 	}
 }
 

+ 92 - 11
internal/core/pipeline_test.go

@@ -471,6 +471,70 @@ func TestPrepareRunPlanSmall(t *testing.T) {
 	assert.Equal(t, "a28e9064c70618dc9d68e1401b889975e0680d11", plan[9].Commit.Hash.String())
 }
 
+func TestMergeDag(t *testing.T) {
+	cit, err := test.Repository.Log(&git.LogOptions{From: plumbing.ZeroHash})
+	if err != nil {
+		panic(err)
+	}
+	defer cit.Close()
+	var commits []*object.Commit
+	timeCutoff := time.Date(2017, 8, 12, 0, 0, 0, 0, time.FixedZone("CET", 7200))
+	cit.ForEach(func(commit *object.Commit) error {
+		reliableTime := time.Date(commit.Author.When.Year(), commit.Author.When.Month(),
+			commit.Author.When.Day(), commit.Author.When.Hour(), commit.Author.When.Minute(),
+			commit.Author.When.Second(), 0, time.FixedZone("CET", 7200))
+		if reliableTime.Before(timeCutoff) {
+			commits = append(commits, commit)
+		}
+		return nil
+	})
+	hashes, dag := buildDag(commits)
+	leaveRootComponent(hashes, dag)
+	mergedDag, _ := mergeDag(hashes, dag)
+	for key, vals := range mergedDag {
+		if key != plumbing.NewHash("a28e9064c70618dc9d68e1401b889975e0680d11") &&
+			key != plumbing.NewHash("db325a212d0bc99b470e000641d814745024bbd5") {
+			assert.Len(t, vals, len(dag[key]), key.String())
+		} else {
+			mvals := map[string]bool{}
+			for _, val := range vals {
+				mvals[val.Hash.String()] = true
+			}
+			if key == plumbing.NewHash("a28e9064c70618dc9d68e1401b889975e0680d11") {
+				assert.Contains(t, mvals, "db325a212d0bc99b470e000641d814745024bbd5")
+				assert.Contains(t, mvals, "be9b61e09b08b98e64ed461a4004c9e2412f78ee")
+			}
+			if key == plumbing.NewHash("db325a212d0bc99b470e000641d814745024bbd5") {
+				assert.Contains(t, mvals, "f30daba81ff2bf0b3ba02a1e1441e74f8a4f6fee")
+				assert.Contains(t, mvals, "8a03b5620b1caa72ec9cb847ea88332621e2950a")
+			}
+		}
+	}
+	assert.Len(t, mergedDag, 8)
+	assert.Contains(t, mergedDag, plumbing.NewHash("cce947b98a050c6d356bc6ba95030254914027b1"))
+	assert.Contains(t, mergedDag, plumbing.NewHash("a3ee37f91f0d705ec9c41ae88426f0ae44b2fbc3"))
+	assert.Contains(t, mergedDag, plumbing.NewHash("a28e9064c70618dc9d68e1401b889975e0680d11"))
+	assert.Contains(t, mergedDag, plumbing.NewHash("be9b61e09b08b98e64ed461a4004c9e2412f78ee"))
+	assert.Contains(t, mergedDag, plumbing.NewHash("db325a212d0bc99b470e000641d814745024bbd5"))
+	assert.Contains(t, mergedDag, plumbing.NewHash("f30daba81ff2bf0b3ba02a1e1441e74f8a4f6fee"))
+	assert.Contains(t, mergedDag, plumbing.NewHash("8a03b5620b1caa72ec9cb847ea88332621e2950a"))
+	assert.Contains(t, mergedDag, plumbing.NewHash("dd9dd084d5851d7dc4399fc7dbf3d8292831ebc5"))
+	queue := []plumbing.Hash{plumbing.NewHash("cce947b98a050c6d356bc6ba95030254914027b1")}
+	visited := map[plumbing.Hash]bool{}
+	for len(queue) > 0 {
+		head := queue[len(queue)-1]
+		queue = queue[:len(queue)-1]
+		if visited[head] {
+			continue
+		}
+		visited[head] = true
+		for _, child := range mergedDag[head] {
+			queue = append(queue, child.Hash)
+		}
+	}
+	assert.Len(t, visited, 8)
+}
+
 func TestPrepareRunPlanBig(t *testing.T) {
 	cases := [][7]int {
 		{2017, 8, 9, 0, 0, 0, 0},
@@ -482,15 +546,15 @@ func TestPrepareRunPlanBig(t *testing.T) {
 		{2017, 12, 9, 1, 1, 1, 1},
 		{2017, 12, 10, 1, 1, 1, 1},
 		{2017, 12, 11, 2, 2, 2, 2},
-		{2017, 12, 19, 4, 4, 4, 4},
-		{2017, 12, 27, 4, 4, 4, 4},
-		{2018, 1, 10, 4, 4, 4, 4},
-		{2018, 1, 16, 4, 4, 4, 4},
-		{2018, 1, 18, 5, 6, 5, 5},
-		{2018, 1, 23, 6, 6, 6, 6},
-		{2018, 3, 12, 7, 7, 7, 7},
-		{2018, 5, 13, 7, 7, 7, 7},
-		{2018, 5, 16, 10, 9, 10, 9},
+		{2017, 12, 19, 3, 3, 3, 3},
+		{2017, 12, 27, 3, 3, 3, 3},
+		{2018, 1, 10, 3, 3, 3, 3},
+		{2018, 1, 16, 3, 3, 3, 3},
+		{2018, 1, 18, 4, 5, 4, 4},
+		{2018, 1, 23, 5, 5, 5, 5},
+		{2018, 3, 12, 6, 6, 6, 6},
+		{2018, 5, 13, 6, 6, 6, 6},
+		{2018, 5, 16, 7, 7, 7, 7},
 	}
 	for _, testCase := range cases {
 		func() {
@@ -523,11 +587,16 @@ func TestPrepareRunPlanBig(t *testing.T) {
 			numForks := 0
 			numMerges := 0
 			numDeletes := 0
-			processed := map[plumbing.Hash]bool{}
+			processed := map[plumbing.Hash]map[int]int{}
 			for _, p := range plan {
 				switch p.Action {
 				case runActionCommit:
-					processed[p.Commit.Hash] = true
+					branches := processed[p.Commit.Hash]
+					if branches == nil {
+						branches = map[int]int{}
+						processed[p.Commit.Hash] = branches
+					}
+					branches[p.Items[0]]++
 					for _, parent := range p.Commit.ParentHashes {
 						assert.Contains(t, processed, parent)
 					}
@@ -535,11 +604,23 @@ func TestPrepareRunPlanBig(t *testing.T) {
 				case runActionFork:
 					numForks++
 				case runActionMerge:
+					counts := map[int]int{}
+					for _, i := range p.Items {
+						counts[i]++
+					}
+					for x, v := range counts {
+						assert.Equal(t, 1, v, x)
+					}
 					numMerges++
 				case runActionDelete:
 					numDeletes++
 				}
 			}
+			for c, branches := range processed {
+				for b, v := range branches {
+					assert.Equal(t, 1, v, fmt.Sprint(c.String(), b))
+				}
+			}
 			assert.Equal(t, numCommits, len(commits)+testCase[3], fmt.Sprintf("commits %v", testCase))
 			assert.Equal(t, numForks, testCase[4], fmt.Sprintf("forks %v", testCase))
 			assert.Equal(t, numMerges, testCase[5], fmt.Sprintf("merges %v", testCase))