瀏覽代碼

Add exact signature id matching

Fixes #340

Signed-off-by: Vadim Markovtsev <vadim@athenian.co>
Vadim Markovtsev 5 年之前
父節點
當前提交
464dc7f156
共有 3 個文件被更改,包括 113 次插入48 次删除
  1. 67 30
      internal/plumbing/identity/identity.go
  2. 45 17
      internal/plumbing/identity/identity_test.go
  3. 1 1
      python/setup.py

+ 67 - 30
internal/plumbing/identity/identity.go

@@ -21,6 +21,9 @@ type Detector struct {
 	PeopleDict map[string]int
 	// ReversedPeopleDict maps developer id -> description
 	ReversedPeopleDict []string
+	// ExactSignatures chooses the matching algorithm: opportunistic email || name
+	// or exact email && name
+	ExactSignatures bool
 
 	l core.Logger
 }
@@ -43,6 +46,10 @@ const (
 	// ConfigIdentityDetectorPeopleDictPath is the name of the configuration option
 	// (Detector.Configure()) which allows to set the external PeopleDict mapping from a file.
 	ConfigIdentityDetectorPeopleDictPath = "IdentityDetector.PeopleDictPath"
+	// ConfigIdentityDetectorExactSignatures is the name of the configuration option
+	// (Detector.Configure()) which changes the matching algorithm to exact signature (name + email)
+	// correspondence.
+	ConfigIdentityDetectorExactSignatures = "IdentityDetector.ExactSignatures"
 	// FactIdentityDetectorPeopleCount is the name of the fact which is inserted in
 	// Detector.Configure(). It is equal to the overall number of unique authors
 	// (the length of ReversedPeopleDict).
@@ -78,7 +85,13 @@ func (detector *Detector) ListConfigurationOptions() []core.ConfigurationOption
 		Description: "Path to the file with developer -> name|email associations.",
 		Flag:        "people-dict",
 		Type:        core.PathConfigurationOption,
-		Default:     ""},
+		Default:     ""}, {
+		Name: ConfigIdentityDetectorExactSignatures,
+		Description: "Disable separate name/email matching. This will lead to considerbly more " +
+			"identities and should not be normally used.",
+		Flag:    "exact-signatures",
+		Type:    core.BoolConfigurationOption,
+		Default: false},
 	}
 	return options[:]
 }
@@ -96,6 +109,9 @@ func (detector *Detector) Configure(facts map[string]interface{}) error {
 	if val, exists := facts[FactIdentityDetectorReversedPeopleDict].([]string); exists {
 		detector.ReversedPeopleDict = val
 	}
+	if val, exists := facts[ConfigIdentityDetectorExactSignatures].(bool); exists {
+		detector.ExactSignatures = val
+	}
 	if detector.PeopleDict == nil || detector.ReversedPeopleDict == nil {
 		peopleDictPath, _ := facts[ConfigIdentityDetectorPeopleDictPath].(string)
 		if peopleDictPath != "" {
@@ -133,13 +149,19 @@ func (detector *Detector) Initialize(repository *git.Repository) error {
 // in Provides(). If there was an error, nil is returned.
 func (detector *Detector) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
 	commit := deps[core.DependencyCommit].(*object.Commit)
+	var authorID int
+	var exists bool
 	signature := commit.Author
-	authorID, exists := detector.PeopleDict[strings.ToLower(signature.Email)]
-	if !exists {
-		authorID, exists = detector.PeopleDict[strings.ToLower(signature.Name)]
+	if !detector.ExactSignatures {
+		authorID, exists = detector.PeopleDict[strings.ToLower(signature.Email)]
 		if !exists {
-			authorID = AuthorMissing
+			authorID, exists = detector.PeopleDict[strings.ToLower(signature.Name)]
 		}
+	} else {
+		authorID, exists = detector.PeopleDict[strings.ToLower(signature.String())]
+	}
+	if !exists {
+		authorID = AuthorMissing
 	}
 	return map[string]interface{}{DependencyAuthor: authorID}, nil
 }
@@ -184,7 +206,8 @@ func (detector *Detector) GeneratePeopleDict(commits []*object.Commit) {
 	size := 0
 
 	mailmapFile, err := commits[len(commits)-1].File(".mailmap")
-	if err == nil {
+	// TODO(vmarkovtsev): properly handle .mailmap if ExactSignatures
+	if !detector.ExactSignatures && err == nil {
 		mailMapContents, err := mailmapFile.Contents()
 		if err == nil {
 			mailmap := ParseMailmap(mailMapContents)
@@ -239,34 +262,48 @@ func (detector *Detector) GeneratePeopleDict(commits []*object.Commit) {
 	}
 
 	for _, commit := range commits {
-		email := strings.ToLower(commit.Author.Email)
-		name := strings.ToLower(commit.Author.Name)
-		id, exists := dict[email]
-		if exists {
-			_, exists := dict[name]
-			if !exists {
-				dict[name] = id
-				names[id] = append(names[id], name)
+		if !detector.ExactSignatures {
+			email := strings.ToLower(commit.Author.Email)
+			name := strings.ToLower(commit.Author.Name)
+			id, exists := dict[email]
+			if exists {
+				_, exists := dict[name]
+				if !exists {
+					dict[name] = id
+					names[id] = append(names[id], name)
+				}
+				continue
+			}
+			id, exists = dict[name]
+			if exists {
+				dict[email] = id
+				emails[id] = append(emails[id], email)
+				continue
+			}
+			dict[email] = size
+			dict[name] = size
+			emails[size] = append(emails[size], email)
+			names[size] = append(names[size], name)
+			size++
+		} else { // !detector.ExactSignatures
+			sig := strings.ToLower(commit.Author.String())
+			if _, exists := dict[sig]; !exists {
+				dict[sig] = size
+				size++
 			}
-			continue
-		}
-		id, exists = dict[name]
-		if exists {
-			dict[email] = id
-			emails[id] = append(emails[id], email)
-			continue
 		}
-		dict[email] = size
-		dict[name] = size
-		emails[size] = append(emails[size], email)
-		names[size] = append(names[size], name)
-		size++
 	}
 	reverseDict := make([]string, size)
-	for _, val := range dict {
-		sort.Strings(names[val])
-		sort.Strings(emails[val])
-		reverseDict[val] = strings.Join(names[val], "|") + "|" + strings.Join(emails[val], "|")
+	if !detector.ExactSignatures {
+		for _, val := range dict {
+			sort.Strings(names[val])
+			sort.Strings(emails[val])
+			reverseDict[val] = strings.Join(names[val], "|") + "|" + strings.Join(emails[val], "|")
+		}
+	} else {
+		for key, val := range dict {
+			reverseDict[val] = key
+		}
 	}
 	detector.PeopleDict = dict
 	detector.ReversedPeopleDict = reverseDict

+ 45 - 17
internal/plumbing/identity/identity_test.go

@@ -39,8 +39,9 @@ func TestIdentityDetectorMeta(t *testing.T) {
 	assert.Equal(t, len(id.Provides()), 1)
 	assert.Equal(t, id.Provides()[0], DependencyAuthor)
 	opts := id.ListConfigurationOptions()
-	assert.Len(t, opts, 1)
+	assert.Len(t, opts, 2)
 	assert.Equal(t, opts[0].Name, ConfigIdentityDetectorPeopleDictPath)
+	assert.Equal(t, opts[1].Name, ConfigIdentityDetectorExactSignatures)
 	logger := core.NewLogger()
 	assert.NoError(t, id.Configure(map[string]interface{}{
 		core.ConfigLogger: logger,
@@ -150,6 +151,28 @@ func TestIdentityDetectorConsume(t *testing.T) {
 	assert.Equal(t, res[DependencyAuthor].(int), AuthorMissing)
 }
 
+func TestIdentityDetectorConsumeExact(t *testing.T) {
+	commit, _ := test.Repository.CommitObject(plumbing.NewHash(
+		"5c0e755dd85ac74584d9988cc361eccf02ce1a48"))
+	deps := map[string]interface{}{}
+	deps[core.DependencyCommit] = commit
+	id := fixtureIdentityDetector()
+	id.ExactSignatures = true
+	id.PeopleDict = map[string]int{
+		"vadim markovtsev <gmarkhor@gmail.com>": 0,
+		"vadim markovtsev <vadim@sourced.tech>": 1,
+	}
+	res, err := id.Consume(deps)
+	assert.Nil(t, err)
+	assert.Equal(t, res[DependencyAuthor].(int), 1)
+	commit, _ = test.Repository.CommitObject(plumbing.NewHash(
+		"8a03b5620b1caa72ec9cb847ea88332621e2950a"))
+	deps[core.DependencyCommit] = commit
+	res, err = id.Consume(deps)
+	assert.Nil(t, err)
+	assert.Equal(t, res[DependencyAuthor].(int), AuthorMissing)
+}
+
 func TestIdentityDetectorLoadPeopleDict(t *testing.T) {
 	id := fixtureIdentityDetector()
 	err := id.LoadPeopleDict(path.Join("..", "..", "test_data", "identities"))
@@ -175,22 +198,6 @@ func TestIdentityDetectorLoadPeopleDictWrongPath(t *testing.T) {
 	assert.NotNil(t, err)
 }
 
-/*
-// internal compiler error in 1.8
-func TestGeneratePeopleDict(t *testing.T) {
-	id := fixtureIdentityDetector()
-	commits := make([]*object.Commit, 0)
-	iter, err := test.Repository.CommitObjects()
-	for ; err != io.EOF; commit, err := iter.Next() {
-		if err != nil {
-			panic(err)
-		}
-		commits = append(commits, commit)
-	}
-	id.GeneratePeopleDict(commits)
-}
-*/
-
 func TestIdentityDetectorGeneratePeopleDict(t *testing.T) {
 	id := fixtureIdentityDetector()
 	commits := make([]*object.Commit, 0)
@@ -244,6 +251,27 @@ func TestIdentityDetectorGeneratePeopleDict(t *testing.T) {
 	assert.NotEqual(t, id.ReversedPeopleDict[len(id.ReversedPeopleDict)-1], AuthorMissingName)
 }
 
+func TestIdentityDetectorGeneratePeopleDictExact(t *testing.T) {
+	id := fixtureIdentityDetector()
+	id.ExactSignatures = true
+	commits := make([]*object.Commit, 0)
+	iter, err := test.Repository.CommitObjects()
+	commit, err := iter.Next()
+	for ; err != io.EOF; commit, err = iter.Next() {
+		if err != nil {
+			panic(err)
+		}
+		commits = append(commits, commit)
+	}
+	id.GeneratePeopleDict(commits)
+	ass := assert.New(t)
+	ass.Equal(len(id.PeopleDict), len(id.ReversedPeopleDict))
+	ass.True(len(id.ReversedPeopleDict) >= 24)
+	ass.Contains(id.PeopleDict, "vadim markovtsev <vadim@sourced.tech>")
+	ass.Contains(id.PeopleDict, "vadim markovtsev <vadim@athenian.co>")
+	ass.NotEqual(id.ReversedPeopleDict[len(id.ReversedPeopleDict)-1], AuthorMissingName)
+}
+
 func TestIdentityDetectorLoadPeopleDictInvalidPath(t *testing.T) {
 	id := fixtureIdentityDetector()
 	ipath := "/xxxyyyzzzInvalidPath!hehe"

+ 1 - 1
python/setup.py

@@ -22,7 +22,7 @@ setup(
     description="Python companion for github.com/src-d/hercules to visualize the results.",
     long_description=long_description,
     long_description_content_type="text/markdown",
-    version="10.7.0",
+    version="10.7.1",
     license="Apache-2.0",
     author="source{d}",
     author_email="machine-learning@sourced.tech",