浏览代码

Add --renames-timeout

Signed-off-by: Vadim Markovtsev <vadim@athenian.co>
Vadim Markovtsev 5 年之前
父节点
当前提交
1b86739f3b
共有 3 个文件被更改,包括 45 次插入5 次删除
  1. 31 3
      internal/plumbing/renames.go
  2. 13 1
      internal/plumbing/renames_test.go
  3. 1 1
      python/setup.py

+ 31 - 3
internal/plumbing/renames.go

@@ -1,6 +1,7 @@
 package plumbing
 
 import (
+	"fmt"
 	"path/filepath"
 	"sort"
 	"strings"
@@ -28,6 +29,9 @@ type RenameAnalysis struct {
 	// set it to the default value of 80 (80%).
 	SimilarityThreshold int
 
+	// Timeout is the maximum time allowed to spend computing renames in a single commit.
+	Timeout time.Duration
+
 	repository *git.Repository
 
 	l core.Logger
@@ -39,10 +43,18 @@ const (
 	// CGit's default is 50%. Ours is 80% because 50% can be too computationally expensive.
 	RenameAnalysisDefaultThreshold = 80
 
+	// RenameAnalysisDefaultTimeout is the default value of RenameAnalysis.Timeout (in milliseconds).
+	RenameAnalysisDefaultTimeout = 60000
+
 	// ConfigRenameAnalysisSimilarityThreshold is the name of the configuration option
 	// (RenameAnalysis.Configure()) which sets the similarity threshold.
 	ConfigRenameAnalysisSimilarityThreshold = "RenameAnalysis.SimilarityThreshold"
 
+	// ConfigRenameAnalysisTimeout is the name of the configuration option
+	// (RenameAnalysis.Configure()) which sets the maximum time allowed to spend
+	// computing renames in a single commit.
+	ConfigRenameAnalysisTimeout = "RenameAnalysis.Timeout"
+
 	// RenameAnalysisMinimumSize is the minimum size of a blob to be considered.
 	RenameAnalysisMinimumSize = 32
 
@@ -84,7 +96,13 @@ func (ra *RenameAnalysis) ListConfigurationOptions() []core.ConfigurationOption
 		Description: "The threshold on the similarity index used to detect renames.",
 		Flag:        "M",
 		Type:        core.IntConfigurationOption,
-		Default:     RenameAnalysisDefaultThreshold},
+		Default:     RenameAnalysisDefaultThreshold}, {
+		Name: ConfigRenameAnalysisTimeout,
+		Description: "The maximum time (milliseconds) allowed to spend computing " +
+			"renames in a single commit. 0 sets the default.",
+		Flag:    "renames-timeout",
+		Type:    core.IntConfigurationOption,
+		Default: RenameAnalysisDefaultTimeout},
 	}
 	return options[:]
 }
@@ -97,6 +115,12 @@ func (ra *RenameAnalysis) Configure(facts map[string]interface{}) error {
 	if val, exists := facts[ConfigRenameAnalysisSimilarityThreshold].(int); exists {
 		ra.SimilarityThreshold = val
 	}
+	if val, exists := facts[ConfigRenameAnalysisTimeout].(int); exists {
+		if val < 0 {
+			return fmt.Errorf("negative renames detection timeout is not allowed: %d", val)
+		}
+		ra.Timeout = time.Duration(val) * time.Millisecond
+	}
 	return nil
 }
 
@@ -109,6 +133,9 @@ func (ra *RenameAnalysis) Initialize(repository *git.Repository) error {
 			RenameAnalysisDefaultThreshold)
 		ra.SimilarityThreshold = RenameAnalysisDefaultThreshold
 	}
+	if ra.Timeout == 0 {
+		ra.Timeout = time.Duration(RenameAnalysisDefaultTimeout) * time.Millisecond
+	}
 	ra.repository = repository
 	return nil
 }
@@ -119,6 +146,7 @@ func (ra *RenameAnalysis) Initialize(repository *git.Repository) error {
 // This function returns the mapping with analysis results. The keys must be the same as
 // in Provides(). If there was an error, nil is returned.
 func (ra *RenameAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
+	beginTime := time.Now()
 	changes := deps[DependencyTreeChanges].(object.Changes)
 	cache := deps[DependencyBlobCache].(map[plumbing.Hash]*CachedBlob)
 
@@ -225,7 +253,7 @@ func (ra *RenameAnalysis) Consume(deps map[string]interface{}) (map[string]inter
 		}()
 		aStart := 0
 		// we will try to find a matching added blob for each deleted blob
-		for d := 0; d < deletedBlobsA.Len(); d++ {
+		for d := 0; d < deletedBlobsA.Len() && time.Now().Sub(beginTime) < ra.Timeout; d++ {
 			myBlob := cache[deletedBlobsA[d].change.From.TreeEntry.Hash]
 			mySize := deletedBlobsA[d].size
 			myName := filepath.Base(deletedBlobsA[d].change.From.Name)
@@ -283,7 +311,7 @@ func (ra *RenameAnalysis) Consume(deps map[string]interface{}) (map[string]inter
 			wg.Done()
 		}()
 		dStart := 0
-		for a := 0; a < addedBlobsB.Len(); a++ {
+		for a := 0; a < addedBlobsB.Len() && time.Now().Sub(beginTime) < ra.Timeout; a++ {
 			myBlob := cache[addedBlobsB[a].change.To.TreeEntry.Hash]
 			mySize := addedBlobsB[a].size
 			myName := filepath.Base(addedBlobsB[a].change.To.Name)

+ 13 - 1
internal/plumbing/renames_test.go

@@ -8,6 +8,7 @@ import (
 	"path"
 	"sync"
 	"testing"
+	"time"
 
 	"github.com/stretchr/testify/assert"
 	"gopkg.in/src-d/go-git.v4/plumbing"
@@ -31,14 +32,17 @@ func TestRenameAnalysisMeta(t *testing.T) {
 	assert.Equal(t, ra.Requires()[0], DependencyBlobCache)
 	assert.Equal(t, ra.Requires()[1], DependencyTreeChanges)
 	opts := ra.ListConfigurationOptions()
-	assert.Len(t, opts, 1)
+	assert.Len(t, opts, 2)
 	assert.Equal(t, opts[0].Name, ConfigRenameAnalysisSimilarityThreshold)
+	assert.Equal(t, opts[1].Name, ConfigRenameAnalysisTimeout)
 	ra.SimilarityThreshold = 0
 
 	assert.NoError(t, ra.Configure(map[string]interface{}{
 		ConfigRenameAnalysisSimilarityThreshold: 70,
+		ConfigRenameAnalysisTimeout:             1000,
 	}))
 	assert.Equal(t, ra.SimilarityThreshold, 70)
+	assert.Equal(t, ra.Timeout, time.Second)
 
 	logger := core.NewLogger()
 	assert.NoError(t, ra.Configure(map[string]interface{}{
@@ -46,6 +50,7 @@ func TestRenameAnalysisMeta(t *testing.T) {
 	}))
 	assert.Equal(t, logger, ra.l)
 	assert.Equal(t, ra.SimilarityThreshold, 70)
+	assert.Equal(t, ra.Timeout, time.Second)
 }
 
 func TestRenameAnalysisRegistration(t *testing.T) {
@@ -138,6 +143,13 @@ func TestRenameAnalysisConsume(t *testing.T) {
 	assert.Nil(t, err)
 	renamed = res[DependencyTreeChanges].(object.Changes)
 	assert.Equal(t, len(renamed), 3)
+
+	ra.SimilarityThreshold = 37
+	ra.Timeout = time.Microsecond
+	res, err = ra.Consume(deps)
+	assert.Nil(t, err)
+	renamed = res[DependencyTreeChanges].(object.Changes)
+	assert.Equal(t, len(renamed), 3)
 }
 
 func TestSortableChanges(t *testing.T) {

+ 1 - 1
python/setup.py

@@ -22,7 +22,7 @@ setup(
     description="Python companion for github.com/src-d/hercules to visualize the results.",
     long_description=long_description,
     long_description_content_type="text/markdown",
-    version="10.7.1",
+    version="10.7.2",
     license="Apache-2.0",
     author="source{d}",
     author_email="machine-learning@sourced.tech",