Browse Source

Improve tree modification debugging

Vadim Markovtsev 8 years ago
parent
commit
40ff67e787
3 changed files with 39 additions and 4 deletions
  1. 19 4
      analyser.go
  2. 3 0
      cmd/hercules/main.go
  3. 17 0
      file.go

+ 19 - 4
analyser.go

@@ -23,6 +23,7 @@ type Analyser struct {
 	Granularity         int
 	Sampling            int
 	SimilarityThreshold int
+	Debug               bool
 	OnProgress          func(int, int)
 }
 
@@ -129,6 +130,14 @@ func (analyser *Analyser) handleModification(
 		} else {
 			file.Update(day, position, 0, length)
 		}
+		if analyser.Debug {
+			file.Validate()
+		}
+	}
+
+	dump_before := ""
+	if analyser.Debug {
+		dump_before = file.Dump()
 	}
 
 	for _, edit := range diffs {
@@ -138,9 +147,12 @@ func (analyser *Analyser) handleModification(
 				r := recover()
 				if r != nil {
 					fmt.Fprintf(os.Stderr, "%s: internal diff error\n", change.To.Name)
-					fmt.Fprintf(os.Stderr, "Update(%d, %d, %d, %d)\n", position, length,
-						          day, utf8.RuneCountInString(pending.Text))
-					fmt.Fprintf(os.Stderr, "====TREE====\n%s====END====\n", file.Dump())
+					fmt.Fprintf(os.Stderr, "Update(%d, %d, %d, %d)\n", day, position,
+						length, utf8.RuneCountInString(pending.Text))
+					if dump_before != "" {
+						fmt.Fprintf(os.Stderr, "====TREE BEFORE====\n%s====END====\n", dump_before)
+					}
+					fmt.Fprintf(os.Stderr, "====TREE AFTER====\n%s====END====\n", file.Dump())
 					panic(r)
 				}
 			}()
@@ -157,6 +169,9 @@ func (analyser *Analyser) handleModification(
 						panic("DiffInsert may not appear after DiffInsert")
 					}
 					file.Update(day, position, length, utf8.RuneCountInString(pending.Text))
+					if analyser.Debug {
+						file.Validate()
+					}
 					position += length
 					pending.Text = ""
 				} else {
@@ -534,7 +549,7 @@ func (analyser *Analyser) Analyse(commits []*object.Commit) [][]int64 {
 							r := recover()
 							if r != nil {
 								fmt.Fprintf(os.Stderr, "#%d - %s: modification error\n",
-									          index, commit.Hash.String())
+									index, commit.Hash.String())
 								panic(r)
 							}
 						}()

+ 3 - 0
cmd/hercules/main.go

@@ -51,11 +51,13 @@ func main() {
 	var profile bool
 	var granularity, sampling, similarity_threshold int
 	var commitsFile string
+	var debug bool
 	flag.BoolVar(&profile, "profile", false, "Collect the profile to hercules.pprof.")
 	flag.IntVar(&granularity, "granularity", 30, "Report granularity in days.")
 	flag.IntVar(&sampling, "sampling", 30, "Report sampling in days.")
 	flag.IntVar(&similarity_threshold, "M", 90,
 		"A threshold on the similarity index used to detect renames.")
+	flag.BoolVar(&debug, "debug", false, "Validate the trees on each step.")
 	flag.StringVar(&commitsFile, "commits", "", "Path to the text file with the "+
 		"commit history to follow instead of the default rev-list "+
 		"--first-parent. The format is the list of hashes, each hash on a "+
@@ -111,6 +113,7 @@ func main() {
 		Granularity:         granularity,
 		Sampling:            sampling,
 		SimilarityThreshold: similarity_threshold,
+		Debug:               debug,
 	}
 	// list of commits belonging to the default branch, from oldest to newest
 	// rev-list --first-parent

+ 17 - 0
file.go

@@ -179,3 +179,20 @@ func (file *File) Dump() string {
 	}
 	return buffer
 }
+
+func (file *File) Validate() {
+	if file.tree.Min().Item().key != 0 {
+		panic("the tree must start with key 0")
+	}
+	if file.tree.Max().Item().value != -1 {
+		panic(fmt.Sprintf("the last value in the tree must be %d", TreeEnd))
+	}
+	prev_key := -1
+	for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
+		node := iter.Item()
+		if node.key == prev_key {
+			panic(fmt.Sprintf("duplicate tree key: %d", node.key))
+		}
+		prev_key = node.key
+	}
+}