Преглед изворни кода

Merge pull request #119 from src-d/akutta

 Support for SSH git access + Filter Whitespace changes + Filter to specific regex matching of files
Vadim Markovtsev пре 6 година
родитељ
комит
d9a9923102

+ 25 - 3
cmd/hercules/root.go

@@ -11,6 +11,7 @@ import (
 	"os"
 	"path/filepath"
 	"plugin"
+	"regexp"
 	"runtime/pprof"
 	"strings"
 	_ "unsafe" // for go:linkname
@@ -31,6 +32,8 @@ import (
 	"gopkg.in/src-d/go-git.v4/storage/memory"
 	"gopkg.in/src-d/hercules.v5"
 	"gopkg.in/src-d/hercules.v5/internal/pb"
+	"gopkg.in/src-d/go-git.v4/plumbing/transport/ssh"
+	"github.com/mitchellh/go-homedir"
 )
 
 // oneLineWriter splits the output data by lines and outputs one on top of another using '\r'.
@@ -52,11 +55,19 @@ func (writer oneLineWriter) Write(p []byte) (n int, err error) {
 	return
 }
 
-func loadRepository(uri string, cachePath string, disableStatus bool) *git.Repository {
+func loadSSHIdentity(sshIdentity string) (*ssh.PublicKeys, error) {
+	actual, err := homedir.Expand(sshIdentity);
+	if err != nil {
+		return nil, err;
+	}
+	return ssh.NewPublicKeysFromFile("git", actual, "")
+}
+
+func loadRepository(uri string, cachePath string, disableStatus bool, sshIdentity string) *git.Repository {
 	var repository *git.Repository
 	var backend storage.Storer
 	var err error
-	if strings.Contains(uri, "://") {
+	if strings.Contains(uri, "://") || regexp.MustCompile("^[A-Za-z]\\w*@[A-Za-z0-9][\\w.]*:").MatchString(uri) {
 		if cachePath != "" {
 			backend, err = filesystem.NewStorage(osfs.New(cachePath))
 			if err != nil {
@@ -75,6 +86,15 @@ func loadRepository(uri string, cachePath string, disableStatus bool) *git.Repos
 			fmt.Fprint(os.Stderr, "connecting...\r")
 			cloneOptions.Progress = oneLineWriter{Writer: os.Stderr}
 		}
+
+		if sshIdentity != "" {
+			auth, err := loadSSHIdentity(sshIdentity);
+			if err != nil {
+				log.Printf("Failed loading SSH Identity %s\n", err)
+			}
+			cloneOptions.Auth = auth
+		}
+
 		repository, err = git.Clone(backend, nil, cloneOptions)
 		if !disableStatus {
 			fmt.Fprint(os.Stderr, strings.Repeat(" ", 80)+"\r")
@@ -157,6 +177,7 @@ targets can be added using the --plugin system.`,
 		protobuf, _ := flags.GetBool("pb")
 		profile, _ := flags.GetBool("profile")
 		disableStatus, _ := flags.GetBool("quiet")
+		sshIdentity, _ := flags.GetString("ssh-identity")
 
 		if profile {
 			go http.ListenAndServe("localhost:6060", nil)
@@ -169,7 +190,7 @@ targets can be added using the --plugin system.`,
 		if len(args) == 2 {
 			cachePath = args[1]
 		}
-		repository := loadRepository(uri, cachePath, disableStatus)
+		repository := loadRepository(uri, cachePath, disableStatus, sshIdentity)
 
 		// core logic
 		pipeline := hercules.NewPipeline(repository)
@@ -419,6 +440,7 @@ func init() {
 	rootFlags.Bool("quiet", !terminal.IsTerminal(int(os.Stdin.Fd())),
 		"Do not print status updates to stderr.")
 	rootFlags.Bool("profile", false, "Collect the profile to hercules.pprof.")
+	rootFlags.String("ssh-identity", "", "Path to SSH identity file (e.g., ~/.ssh/id_rsa) to clone from an SSH remote.")
 	cmdlineFacts, cmdlineDeployed = hercules.Registry.AddFlags(rootFlags)
 	rootCmd.SetUsageFunc(formatUsage)
 	rootCmd.AddCommand(versionCmd)

+ 7 - 6
cmd/hercules/root_test.go

@@ -15,7 +15,7 @@ import (
 )
 
 func TestLoadRepository(t *testing.T) {
-	repo := loadRepository("https://github.com/src-d/hercules", "", true)
+	repo := loadRepository("https://github.com/src-d/hercules", "", true, "")
 	assert.NotNil(t, repo)
 	log.Println("TestLoadRepository: 1/3")
 
@@ -37,7 +37,8 @@ func TestLoadRepository(t *testing.T) {
 		assert.FailNow(t, "filesystem.NewStorage")
 	}
 
-	repo = loadRepository(tempdir, "", true)
+
+	repo = loadRepository(tempdir, "", true, "")
 	assert.NotNil(t, repo)
 	log.Println("TestLoadRepository: 2/3")
 
@@ -45,12 +46,12 @@ func TestLoadRepository(t *testing.T) {
 	if runtime.GOOS != "windows" {
 		// TODO(vmarkovtsev): uncomment once https://github.com/src-d/go-billy-siva/issues/29 is resolved
 		sivafile := filepath.Join(filepath.Dir(filename), "test_data", "hercules.siva")
-		repo = loadRepository(sivafile, "", true)
+		repo = loadRepository(sivafile, "", true, "")
 		assert.NotNil(t, repo)
 		log.Println("TestLoadRepository: 3/3")
 	}
 
-	assert.Panics(t, func() { loadRepository("https://github.com/src-d/porn", "", true) })
-	assert.Panics(t, func() { loadRepository(filepath.Dir(filename), "", true) })
-	assert.Panics(t, func() { loadRepository("/xxx", "", true) })
+	assert.Panics(t, func() { loadRepository("https://github.com/src-d/porn", "", true, "") })
+	assert.Panics(t, func() { loadRepository(filepath.Dir(filename), "", true, "") })
+	assert.Panics(t, func() { loadRepository("/xxx", "", true, "") })
 }

+ 33 - 7
internal/plumbing/diff.go

@@ -1,6 +1,7 @@
 package plumbing
 
 import (
+	"strings"
 	"github.com/sergi/go-diff/diffmatchpatch"
 	"gopkg.in/src-d/go-git.v4"
 	"gopkg.in/src-d/go-git.v4/plumbing"
@@ -14,6 +15,7 @@ import (
 type FileDiff struct {
 	core.NoopMerger
 	CleanupDisabled bool
+	WhitespaceIgnore bool
 }
 
 const (
@@ -24,6 +26,10 @@ const (
 
 	// DependencyFileDiff is the name of the dependency provided by FileDiff.
 	DependencyFileDiff = "file_diff"
+
+	// ConfigFileWhitespaceIgnore is the name of the configuration option (FileDiff.Configure())
+	// to suppress whitespace changes which can pollute the core diff of the files
+	ConfigFileWhitespaceIgnore = "FileDiff.WhitespaceIgnore"
 )
 
 // FileDiffData is the type of the dependency provided by FileDiff.
@@ -56,13 +62,21 @@ func (diff *FileDiff) Requires() []string {
 
 // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
 func (diff *FileDiff) ListConfigurationOptions() []core.ConfigurationOption {
-	options := [...]core.ConfigurationOption{{
-		Name:        ConfigFileDiffDisableCleanup,
-		Description: "Do not apply additional heuristics to improve diffs.",
-		Flag:        "no-diff-cleanup",
-		Type:        core.BoolConfigurationOption,
-		Default:     false},
+	options := [...]core.ConfigurationOption{
+		{
+			Name:        ConfigFileDiffDisableCleanup,
+			Description: "Do not apply additional heuristics to improve diffs.",
+			Flag:        "no-diff-cleanup",
+			Type:        core.BoolConfigurationOption,
+			Default:     false},
+		{
+			Name:        ConfigFileWhitespaceIgnore,
+			Description: "Ignore whitespace when computing diffs.",
+			Flag:        "no-diff-whitespace",
+			Type:        core.BoolConfigurationOption,
+			Default:     false},
 	}
+
 	return options[:]
 }
 
@@ -71,12 +85,23 @@ func (diff *FileDiff) Configure(facts map[string]interface{}) {
 	if val, exists := facts[ConfigFileDiffDisableCleanup].(bool); exists {
 		diff.CleanupDisabled = val
 	}
+	if val, exists := facts[ConfigFileWhitespaceIgnore].(bool); exists {
+		diff.WhitespaceIgnore = val
+	}
 }
 
 // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
 // calls. The repository which is going to be analysed is supplied as an argument.
 func (diff *FileDiff) Initialize(repository *git.Repository) {}
 
+func stripWhitespace(str string, ignoreWhitespace bool) string {
+	if ignoreWhitespace {
+		response := strings.Replace(str, " ", "", -1)
+		return response
+	}
+	return str
+}
+
 // Consume runs this PipelineItem on the next commit data.
 // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
 // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
@@ -99,8 +124,9 @@ func (diff *FileDiff) Consume(deps map[string]interface{}) (map[string]interface
 			// git/git 4f7770c87ce3c302e1639a7737a6d2531fe4b160 fetch-pack.c is invalid UTF-8
 			strFrom, strTo := string(blobFrom.Data), string(blobTo.Data)
 			dmp := diffmatchpatch.New()
-			src, dst, _ := dmp.DiffLinesToRunes(strFrom, strTo)
+			src, dst, _ := dmp.DiffLinesToRunes(stripWhitespace(strFrom, diff.WhitespaceIgnore), stripWhitespace(strTo, diff.WhitespaceIgnore))
 			diffs := dmp.DiffMainRunes(src, dst, false)
+
 			if !diff.CleanupDisabled {
 				diffs = dmp.DiffCleanupMerge(dmp.DiffCleanupSemanticLossless(diffs))
 			}

+ 47 - 2
internal/plumbing/diff_test.go

@@ -24,12 +24,15 @@ func TestFileDiffMeta(t *testing.T) {
 	assert.Equal(t, len(fd.Requires()), 2)
 	assert.Equal(t, fd.Requires()[0], items.DependencyTreeChanges)
 	assert.Equal(t, fd.Requires()[1], items.DependencyBlobCache)
-	assert.Len(t, fd.ListConfigurationOptions(), 1)
+	assert.Len(t, fd.ListConfigurationOptions(), 2)
 	assert.Equal(t, fd.ListConfigurationOptions()[0].Name, items.ConfigFileDiffDisableCleanup)
+	assert.Equal(t, fd.ListConfigurationOptions()[1].Name, items.ConfigFileWhitespaceIgnore)
 	facts := map[string]interface{}{}
 	facts[items.ConfigFileDiffDisableCleanup] = true
+	facts[items.ConfigFileWhitespaceIgnore] = true
 	fd.Configure(facts)
 	assert.True(t, fd.CleanupDisabled)
+	assert.True(t, fd.WhitespaceIgnore)
 }
 
 func TestFileDiffRegistration(t *testing.T) {
@@ -272,6 +275,48 @@ func TestFileDiffDarkMagic(t *testing.T) {
 	assert.Equal(t, magicDiffs.NewLinesOfCode, plainDiffs.NewLinesOfCode)
 }
 
+func TestFileDiffWhitespaceDarkMagic(t *testing.T) {
+	fd := fixtures.FileDiff()
+	deps := map[string]interface{}{}
+	cache := map[plumbing.Hash]*items.CachedBlob{}
+	items.AddHash(t, cache, "448eb3f312849b0ca766063d06b09481c987b309") // 1.java
+	items.AddHash(t, cache, "3312c92f3e8bdfbbdb30bccb6acd1b85bc338dfc") // 2.java
+	deps[items.DependencyBlobCache] = cache
+	changes := make(object.Changes, 1)
+	treeFrom, _ := test.Repository.TreeObject(plumbing.NewHash(
+		"f02289bfe843388a1bb3c7dea210374082dd86b9"))
+	treeTo, _ := test.Repository.TreeObject(plumbing.NewHash(
+		"eca91acf1fd828f20dcb653a061d8c97d965bc6c"))
+	changes[0] = &object.Change{From: object.ChangeEntry{
+		Name: "test.java",
+		Tree: treeFrom,
+		TreeEntry: object.TreeEntry{
+			Name: "test.java",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("448eb3f312849b0ca766063d06b09481c987b309"),
+		},
+	}, To: object.ChangeEntry{
+		Name: "test.java",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: "test.java",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("3312c92f3e8bdfbbdb30bccb6acd1b85bc338dfc"),
+		},
+	}}
+	deps[items.DependencyTreeChanges] = changes
+	res, err := fd.Consume(deps)
+	assert.Nil(t, err)
+	magicDiffs := res[items.DependencyFileDiff].(map[string]items.FileDiffData)["test.java"]
+	fd.WhitespaceIgnore = true
+	res, err = fd.Consume(deps)
+	assert.Nil(t, err)
+	plainDiffs := res[items.DependencyFileDiff].(map[string]items.FileDiffData)["test.java"]
+	assert.NotEqual(t, magicDiffs.Diffs, plainDiffs.Diffs)
+	assert.Equal(t, magicDiffs.OldLinesOfCode, plainDiffs.OldLinesOfCode)
+	assert.Equal(t, magicDiffs.NewLinesOfCode, plainDiffs.NewLinesOfCode)
+}
+
 func TestFileDiffFork(t *testing.T) {
 	fd1 := fixtures.FileDiff()
 	clones := fd1.Fork(1)
@@ -279,4 +324,4 @@ func TestFileDiffFork(t *testing.T) {
 	fd2 := clones[0].(*items.FileDiff)
 	assert.True(t, fd1 == fd2)
 	fd1.Merge([]core.PipelineItem{fd2})
-}
+}

+ 42 - 16
internal/plumbing/tree_diff.go

@@ -5,12 +5,13 @@ import (
 	"gopkg.in/src-d/enry.v1"
 	"io"
 	"log"
+	"regexp"
 	"strings"
 
 	"gopkg.in/src-d/go-git.v4"
+	"gopkg.in/src-d/go-git.v4/plumbing"
 	"gopkg.in/src-d/go-git.v4/plumbing/object"
 	"gopkg.in/src-d/hercules.v5/internal/core"
-	"gopkg.in/src-d/go-git.v4/plumbing"
 )
 
 // TreeDiff generates the list of changes for a commit. A change can be either one or two blobs
@@ -19,12 +20,13 @@ import (
 // TreeDiff is a PipelineItem.
 type TreeDiff struct {
 	core.NoopMerger
-	SkipDirs     []string
-	Languages    map[string]bool
+	SkipDirs   []string
+	NameFilter *regexp.Regexp
+	Languages  map[string]bool
 
-	previousTree *object.Tree
+	previousTree   *object.Tree
 	previousCommit plumbing.Hash
-	repository *git.Repository
+	repository     *git.Repository
 }
 
 const (
@@ -44,6 +46,10 @@ const (
 	ConfigTreeDiffLanguages = "TreeDiff.Languages"
 	// allLanguages denotes passing all files in.
 	allLanguages = "all"
+
+	// ConfigTreeDiffFilterRegex is the name of the configuration option
+	// (TreeDiff.Configure()) which makes FileDiff consider only those files which have names matching this regexp.
+	ConfigTreeDiffFilterRegex = "TreeDiff.FilteredRegexes"
 )
 
 // defaultBlacklistedPrefixes is the list of file path prefixes which should be skipped by default.
@@ -82,21 +88,29 @@ func (treediff *TreeDiff) ListConfigurationOptions() []core.ConfigurationOption
 		Flag:        "skip-blacklist",
 		Type:        core.BoolConfigurationOption,
 		Default:     false}, {
-		Name:        ConfigTreeDiffBlacklistedPrefixes,
+
+		Name: ConfigTreeDiffBlacklistedPrefixes,
 		Description: "List of blacklisted path prefixes (e.g. directories or specific files). " +
 			"Values are in the UNIX format (\"path/to/x\"). Values should *not* start with \"/\". " +
 			"Separated with commas \",\".",
-		Flag:        "blacklisted-prefixes",
-		Type:        core.StringsConfigurationOption,
-		Default:     defaultBlacklistedPrefixes}, {
-		Name:        ConfigTreeDiffLanguages,
+		Flag:    "blacklisted-prefixes",
+		Type:    core.StringsConfigurationOption,
+		Default: defaultBlacklistedPrefixes}, {
+
+		Name: ConfigTreeDiffLanguages,
 		Description: fmt.Sprintf(
-			"List of programming languages to analyze. Separated by comma \",\". " +
-			"Names are at https://doc.bblf.sh/languages.html \"%s\" is the special name " +
-			"which disables this filter and lets all the files through.", allLanguages),
-		Flag:        "languages",
-		Type:        core.StringsConfigurationOption,
-		Default:     []string{allLanguages}},
+			"List of programming languages to analyze. Separated by comma \",\". "+
+				"Names are at https://doc.bblf.sh/languages.html \"%s\" is the special name "+
+				"which disables this filter and lets all the files through.", allLanguages),
+		Flag:    "languages",
+		Type:    core.StringsConfigurationOption,
+		Default: []string{allLanguages}}, {
+
+		Name:        ConfigTreeDiffFilterRegex,
+		Description: "Whitelist Regex to determine which files to analyze",
+		Flag:        "whitelist",
+		Type:        core.StringConfigurationOption,
+		Default:     ""},
 	}
 	return options[:]
 }
@@ -115,6 +129,10 @@ func (treediff *TreeDiff) Configure(facts map[string]interface{}) {
 		treediff.Languages = map[string]bool{}
 		treediff.Languages[allLanguages] = true
 	}
+
+	if val, exists := facts[ConfigTreeDiffFilterRegex].(string); exists {
+		treediff.NameFilter = regexp.MustCompile(val)
+	}
 }
 
 // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
@@ -196,6 +214,14 @@ OUTER:
 				continue OUTER
 			}
 		}
+		if treediff.NameFilter != nil {
+			matchedTo := treediff.NameFilter.MatchString(change.To.Name)
+			matchedFrom := treediff.NameFilter.MatchString(change.From.Name)
+
+			if !matchedTo && !matchedFrom {
+				continue OUTER
+			}
+		}
 		var changeEntry object.ChangeEntry
 		if change.To.Tree == nil {
 			changeEntry = change.From

+ 32 - 2
internal/plumbing/tree_diff_test.go

@@ -25,7 +25,7 @@ func TestTreeDiffMeta(t *testing.T) {
 	assert.Equal(t, len(td.Provides()), 1)
 	assert.Equal(t, td.Provides()[0], DependencyTreeChanges)
 	opts := td.ListConfigurationOptions()
-	assert.Len(t, opts, 3)
+	assert.Len(t, opts, 4)
 }
 
 func TestTreeDiffRegistration(t *testing.T) {
@@ -143,6 +143,36 @@ func TestTreeDiffConsumeSkip(t *testing.T) {
 	assert.Equal(t, 31, len(changes))
 }
 
+func TestTreeDiffConsumeOnlyFilesThatMatchFilter(t *testing.T) {
+	// consume without skiping
+	td := fixtureTreeDiff()
+	assert.Contains(t, td.Languages, allLanguages)
+	commit, _ := test.Repository.CommitObject(plumbing.NewHash(
+		"aefdedf7cafa6ee110bae9a3910bf5088fdeb5a9"))
+	deps := map[string]interface{}{}
+	deps[core.DependencyCommit] = commit
+	prevCommit, _ := test.Repository.CommitObject(plumbing.NewHash(
+		"1e076dc56989bc6aa1ef5f55901696e9e01423d4"))
+	td.previousTree, _ = prevCommit.Tree()
+	res, err := td.Consume(deps)
+	assert.Nil(t, err)
+	assert.Equal(t, len(res), 1)
+	changes := res[DependencyTreeChanges].(object.Changes)
+	assert.Equal(t, 37, len(changes))
+
+	// consume with skipping
+	td = fixtureTreeDiff()
+	td.previousTree, _ = prevCommit.Tree()
+	td.Configure(map[string]interface{}{
+		ConfigTreeDiffFilterRegex: ".*go",
+	})
+	res, err = td.Consume(deps)
+	assert.Nil(t, err)
+	assert.Equal(t, len(res), 1)
+	changes = res[DependencyTreeChanges].(object.Changes)
+	assert.Equal(t, 27, len(changes))
+}
+
 func TestTreeDiffConsumeLanguageFilterFirst(t *testing.T) {
 	td := fixtureTreeDiff()
 	td.Configure(map[string]interface{}{ConfigTreeDiffLanguages: "Go"})
@@ -194,4 +224,4 @@ func TestTreeDiffFork(t *testing.T) {
 	assert.Equal(t, td1.SkipDirs, td2.SkipDirs)
 	assert.Equal(t, td1.previousTree, td2.previousTree)
 	td1.Merge([]core.PipelineItem{td2})
-}
+}