Procházet zdrojové kódy

Merge pull request #243 from vmarkovtsev/master

Add TyposDatasetBuilder
Vadim Markovtsev před 6 roky
rodič
revize
c8fd37d1bc

+ 2 - 6
core.go

@@ -9,7 +9,8 @@ import (
 	"gopkg.in/src-d/hercules.v9/internal/plumbing/identity"
 	"gopkg.in/src-d/hercules.v9/internal/plumbing/uast"
 	"gopkg.in/src-d/hercules.v9/internal/yaml"
-	"gopkg.in/src-d/hercules.v9/leaves"
+	_ "gopkg.in/src-d/hercules.v9/leaves"          // add burndown and other analyses
+	_ "gopkg.in/src-d/hercules.v9/leaves/research" // add "research" analyses
 )
 
 // ConfigurationOptionType represents the possible types of a ConfigurationOption's value.
@@ -171,8 +172,3 @@ func PathifyFlagValue(flag *pflag.Flag) {
 func EnablePathFlagTypeMasquerade() {
 	core.EnablePathFlagTypeMasquerade()
 }
-
-func init() {
-	// hack to link with .leaves
-	_ = leaves.BurndownAnalysis{}
-}

+ 168 - 96
internal/pb/pb.pb.go

@@ -32,6 +32,8 @@ It has these top-level messages:
 	CommitFile
 	Commit
 	CommitsAnalysisResults
+	Typo
+	TyposDataset
 	AnalysisResults
 */
 package pb
@@ -819,6 +821,70 @@ func (m *CommitsAnalysisResults) GetAuthorIndex() []string {
 	return nil
 }
 
+type Typo struct {
+	Wrong   string `protobuf:"bytes,1,opt,name=wrong,proto3" json:"wrong,omitempty"`
+	Correct string `protobuf:"bytes,2,opt,name=correct,proto3" json:"correct,omitempty"`
+	Commit  string `protobuf:"bytes,3,opt,name=commit,proto3" json:"commit,omitempty"`
+	File    string `protobuf:"bytes,4,opt,name=file,proto3" json:"file,omitempty"`
+	Line    int32  `protobuf:"varint,5,opt,name=line,proto3" json:"line,omitempty"`
+}
+
+func (m *Typo) Reset()                    { *m = Typo{} }
+func (m *Typo) String() string            { return proto.CompactTextString(m) }
+func (*Typo) ProtoMessage()               {}
+func (*Typo) Descriptor() ([]byte, []int) { return fileDescriptorPb, []int{24} }
+
+func (m *Typo) GetWrong() string {
+	if m != nil {
+		return m.Wrong
+	}
+	return ""
+}
+
+func (m *Typo) GetCorrect() string {
+	if m != nil {
+		return m.Correct
+	}
+	return ""
+}
+
+func (m *Typo) GetCommit() string {
+	if m != nil {
+		return m.Commit
+	}
+	return ""
+}
+
+func (m *Typo) GetFile() string {
+	if m != nil {
+		return m.File
+	}
+	return ""
+}
+
+func (m *Typo) GetLine() int32 {
+	if m != nil {
+		return m.Line
+	}
+	return 0
+}
+
+type TyposDataset struct {
+	Typos []*Typo `protobuf:"bytes,1,rep,name=typos" json:"typos,omitempty"`
+}
+
+func (m *TyposDataset) Reset()                    { *m = TyposDataset{} }
+func (m *TyposDataset) String() string            { return proto.CompactTextString(m) }
+func (*TyposDataset) ProtoMessage()               {}
+func (*TyposDataset) Descriptor() ([]byte, []int) { return fileDescriptorPb, []int{25} }
+
+func (m *TyposDataset) GetTypos() []*Typo {
+	if m != nil {
+		return m.Typos
+	}
+	return nil
+}
+
 type AnalysisResults struct {
 	Header *Metadata `protobuf:"bytes,1,opt,name=header" json:"header,omitempty"`
 	// the mapped values are dynamic messages which require the second parsing pass.
@@ -828,7 +894,7 @@ type AnalysisResults struct {
 func (m *AnalysisResults) Reset()                    { *m = AnalysisResults{} }
 func (m *AnalysisResults) String() string            { return proto.CompactTextString(m) }
 func (*AnalysisResults) ProtoMessage()               {}
-func (*AnalysisResults) Descriptor() ([]byte, []int) { return fileDescriptorPb, []int{24} }
+func (*AnalysisResults) Descriptor() ([]byte, []int) { return fileDescriptorPb, []int{26} }
 
 func (m *AnalysisResults) GetHeader() *Metadata {
 	if m != nil {
@@ -869,105 +935,111 @@ func init() {
 	proto.RegisterType((*CommitFile)(nil), "CommitFile")
 	proto.RegisterType((*Commit)(nil), "Commit")
 	proto.RegisterType((*CommitsAnalysisResults)(nil), "CommitsAnalysisResults")
+	proto.RegisterType((*Typo)(nil), "Typo")
+	proto.RegisterType((*TyposDataset)(nil), "TyposDataset")
 	proto.RegisterType((*AnalysisResults)(nil), "AnalysisResults")
 }
 
 func init() { proto.RegisterFile("pb.proto", fileDescriptorPb) }
 
 var fileDescriptorPb = []byte{
-	// 1495 bytes of a gzipped FileDescriptorProto
-	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x94, 0x57, 0xcb, 0x6e, 0xdb, 0x46,
-	0x17, 0x06, 0x75, 0xd7, 0x91, 0x2c, 0x27, 0x13, 0xff, 0x36, 0xa3, 0x1f, 0xce, 0xaf, 0x10, 0xfe,
-	0x03, 0xb7, 0x49, 0x99, 0xc0, 0xe9, 0x22, 0x4d, 0x37, 0xb1, 0xe5, 0x06, 0x31, 0x10, 0x37, 0x2d,
-	0x95, 0xa4, 0xbb, 0x08, 0x63, 0x71, 0x6c, 0xb1, 0x95, 0x86, 0xc4, 0x0c, 0x29, 0x59, 0x40, 0xfb,
-	0x2a, 0xdd, 0x75, 0xd1, 0x02, 0x5d, 0xf5, 0x05, 0xba, 0xe8, 0xa6, 0xdb, 0x3e, 0x44, 0x81, 0xbe,
-	0x45, 0x31, 0x37, 0x5e, 0x14, 0x3a, 0x4d, 0x77, 0x3c, 0xe7, 0x7c, 0x67, 0xe6, 0xcc, 0x77, 0x2e,
-	0x33, 0x84, 0x56, 0x74, 0xe6, 0x46, 0x2c, 0x8c, 0x43, 0xe7, 0xcf, 0x0a, 0xb4, 0x4e, 0x49, 0x8c,
-	0x7d, 0x1c, 0x63, 0x64, 0x43, 0x73, 0x41, 0x18, 0x0f, 0x42, 0x6a, 0x5b, 0x03, 0x6b, 0xbf, 0xee,
-	0x19, 0x11, 0x21, 0xa8, 0x4d, 0x31, 0x9f, 0xda, 0x95, 0x81, 0xb5, 0xdf, 0xf6, 0xe4, 0x37, 0xba,
-	0x05, 0xc0, 0x48, 0x14, 0xf2, 0x20, 0x0e, 0xd9, 0xca, 0xae, 0x4a, 0x4b, 0x4e, 0x83, 0xee, 0xc0,
-	0xe6, 0x19, 0xb9, 0x08, 0xe8, 0x38, 0xa1, 0xc1, 0xe5, 0x38, 0x0e, 0xe6, 0xc4, 0xae, 0x0d, 0xac,
-	0xfd, 0xaa, 0xb7, 0x21, 0xd5, 0xaf, 0x68, 0x70, 0xf9, 0x32, 0x98, 0x13, 0xe4, 0xc0, 0x06, 0xa1,
-	0x7e, 0x0e, 0x55, 0x97, 0xa8, 0x0e, 0xa1, 0x7e, 0x8a, 0xb1, 0xa1, 0x39, 0x09, 0xe7, 0xf3, 0x20,
-	0xe6, 0x76, 0x43, 0x45, 0xa6, 0x45, 0x74, 0x13, 0x5a, 0x2c, 0xa1, 0xca, 0xb1, 0x29, 0x1d, 0x9b,
-	0x2c, 0xa1, 0xd2, 0xe9, 0x19, 0x5c, 0x37, 0xa6, 0x71, 0x44, 0xd8, 0x38, 0x88, 0xc9, 0xdc, 0x6e,
-	0x0d, 0xaa, 0xfb, 0x9d, 0x83, 0x5d, 0xd7, 0x1c, 0xda, 0xf5, 0x14, 0xfa, 0x0b, 0xc2, 0x4e, 0x62,
-	0x32, 0xff, 0x8c, 0xc6, 0x6c, 0xe5, 0xf5, 0x58, 0x41, 0xd9, 0x3f, 0x84, 0x1b, 0x25, 0x30, 0x74,
-	0x0d, 0xaa, 0xdf, 0x90, 0x95, 0xe4, 0xaa, 0xed, 0x89, 0x4f, 0xb4, 0x05, 0xf5, 0x05, 0x9e, 0x25,
-	0x44, 0x12, 0x65, 0x79, 0x4a, 0x78, 0x5c, 0x79, 0x64, 0x39, 0x0f, 0x61, 0xe7, 0x28, 0x61, 0xd4,
-	0x0f, 0x97, 0x74, 0x14, 0x61, 0xc6, 0xc9, 0x29, 0x8e, 0x59, 0x70, 0xe9, 0x85, 0x4b, 0x75, 0xb8,
-	0x59, 0x32, 0xa7, 0xdc, 0xb6, 0x06, 0xd5, 0xfd, 0x0d, 0xcf, 0x88, 0xce, 0x4f, 0x16, 0x6c, 0x95,
-	0x79, 0x89, 0x7c, 0x50, 0x3c, 0x27, 0x7a, 0x6b, 0xf9, 0x8d, 0xf6, 0xa0, 0x47, 0x93, 0xf9, 0x19,
-	0x61, 0xe3, 0xf0, 0x7c, 0xcc, 0xc2, 0x25, 0x97, 0x41, 0xd4, 0xbd, 0xae, 0xd2, 0xbe, 0x38, 0xf7,
-	0xc2, 0x25, 0x47, 0x1f, 0xc2, 0xf5, 0x0c, 0x65, 0xb6, 0xad, 0x4a, 0xe0, 0xa6, 0x01, 0x0e, 0x95,
-	0x1a, 0xdd, 0x83, 0x9a, 0x5c, 0xa7, 0x26, 0x39, 0xb3, 0xdd, 0x2b, 0x0e, 0xe0, 0x49, 0x94, 0xf3,
-	0x2d, 0xf4, 0x9e, 0x06, 0x33, 0xc2, 0x5f, 0x2c, 0x29, 0x61, 0x7c, 0x1a, 0x44, 0xe8, 0x81, 0x61,
-	0xc3, 0x92, 0x0b, 0xf4, 0xdd, 0xa2, 0xdd, 0x7d, 0x2d, 0x8c, 0x8a, 0x71, 0x05, 0xec, 0x3f, 0x02,
-	0xc8, 0x94, 0x79, 0x7e, 0xeb, 0x25, 0xfc, 0xd6, 0xf3, 0xfc, 0xfe, 0x55, 0xc9, 0x08, 0x3e, 0xa4,
-	0x78, 0xb6, 0xe2, 0x01, 0xf7, 0x08, 0x4f, 0x66, 0x31, 0x47, 0x03, 0xe8, 0x5c, 0x30, 0x4c, 0x93,
-	0x19, 0x66, 0x41, 0x6c, 0xd6, 0xcb, 0xab, 0x50, 0x1f, 0x5a, 0x1c, 0xcf, 0xa3, 0x59, 0x40, 0x2f,
-	0xf4, 0xd2, 0xa9, 0x8c, 0xee, 0x43, 0x33, 0x62, 0xe1, 0xd7, 0x64, 0x12, 0x4b, 0x9e, 0x3a, 0x07,
-	0xff, 0x29, 0x27, 0xc2, 0xa0, 0xd0, 0x5d, 0xa8, 0x9f, 0x8b, 0x83, 0x6a, 0xde, 0xae, 0x80, 0x2b,
-	0x0c, 0xfa, 0x08, 0x1a, 0x11, 0x09, 0xa3, 0x99, 0x28, 0xfb, 0x77, 0xa0, 0x35, 0x08, 0x9d, 0x00,
-	0x52, 0x5f, 0xe3, 0x80, 0xc6, 0x84, 0xe1, 0x49, 0x2c, 0xba, 0xb5, 0x21, 0xe3, 0xea, 0xbb, 0xc3,
-	0x70, 0x1e, 0x31, 0xc2, 0x39, 0xf1, 0x95, 0xb3, 0x17, 0x2e, 0xb5, 0xff, 0x75, 0xe5, 0x75, 0x92,
-	0x39, 0xa1, 0x47, 0xb0, 0x29, 0x43, 0x18, 0x87, 0x26, 0x21, 0x76, 0x53, 0x86, 0xb0, 0xb9, 0x96,
-	0x27, 0xaf, 0x77, 0x5e, 0x90, 0x9d, 0x5f, 0x2c, 0xb8, 0x79, 0xe5, 0x56, 0x25, 0x75, 0x68, 0xbd,
-	0x6f, 0x1d, 0x56, 0xca, 0xeb, 0x10, 0x41, 0x4d, 0xb4, 0xaa, 0x5d, 0x1d, 0x54, 0xf7, 0xab, 0x5e,
-	0xcd, 0xcc, 0xaa, 0x80, 0xfa, 0xc1, 0x44, 0xd3, 0x5c, 0xf7, 0x8c, 0x88, 0xb6, 0xa1, 0x11, 0x50,
-	0x3f, 0x8a, 0x99, 0x64, 0xb4, 0xea, 0x69, 0xc9, 0x19, 0x41, 0x73, 0x18, 0x26, 0x91, 0x20, 0x7d,
-	0x0b, 0xea, 0x01, 0xf5, 0xc9, 0xa5, 0x2c, 0xcc, 0xb6, 0xa7, 0x04, 0x74, 0x00, 0x8d, 0xb9, 0x3c,
-	0x82, 0x8c, 0xe3, 0xdd, 0x7c, 0x6a, 0xa4, 0xb3, 0x07, 0xdd, 0x97, 0x61, 0x32, 0x99, 0x12, 0x5f,
-	0x72, 0x26, 0x56, 0x56, 0xb9, 0xb7, 0x64, 0x50, 0x4a, 0x70, 0x7e, 0xb7, 0x60, 0x5b, 0xef, 0xbd,
-	0x5e, 0x9b, 0x77, 0xa1, 0x2b, 0x30, 0xe3, 0x89, 0x32, 0xeb, 0x54, 0xb6, 0x5c, 0x0d, 0xf7, 0x3a,
-	0xc2, 0x6a, 0xe2, 0xbe, 0x0f, 0x3d, 0x9d, 0x7d, 0x03, 0x6f, 0xae, 0xc1, 0x37, 0x94, 0xdd, 0x38,
-	0x3c, 0x80, 0xae, 0x76, 0x50, 0x51, 0xa9, 0xe9, 0xb7, 0xe1, 0xe6, 0x63, 0xf6, 0x3a, 0x0a, 0xa2,
-	0x0e, 0xf0, 0x3f, 0xe8, 0xa8, 0xaa, 0x98, 0x05, 0x94, 0x70, 0xbb, 0x2d, 0x8f, 0x01, 0x52, 0xf5,
-	0x5c, 0x68, 0x9c, 0x1f, 0x2c, 0x80, 0x57, 0x87, 0xa3, 0x97, 0xc3, 0x29, 0xa6, 0x17, 0x04, 0xfd,
-	0x17, 0xda, 0x32, 0xfe, 0xdc, 0x38, 0x6a, 0x09, 0xc5, 0xe7, 0x62, 0x24, 0xed, 0x02, 0x70, 0x36,
-	0x19, 0x9f, 0x91, 0xf3, 0x90, 0x11, 0x7d, 0x79, 0xb4, 0x39, 0x9b, 0x1c, 0x49, 0x85, 0xf0, 0x15,
-	0x66, 0x7c, 0x1e, 0x13, 0xa6, 0x2f, 0x90, 0x16, 0x67, 0x93, 0x43, 0x21, 0x8b, 0x40, 0x12, 0xcc,
-	0x63, 0xe3, 0x5c, 0x53, 0xf7, 0x8b, 0x50, 0x69, 0xef, 0x5d, 0x90, 0x92, 0x76, 0xaf, 0xab, 0xc5,
-	0x85, 0x46, 0xfa, 0x3b, 0x4f, 0x60, 0x27, 0x0b, 0x93, 0x8f, 0xf0, 0x82, 0x30, 0xc3, 0xf9, 0xff,
-	0xa1, 0x39, 0x51, 0x6a, 0x3d, 0x99, 0x3a, 0x6e, 0x06, 0xf5, 0x8c, 0xcd, 0xf9, 0xcd, 0x82, 0xde,
-	0x68, 0x1a, 0xc6, 0x94, 0x70, 0xee, 0x91, 0x49, 0xc8, 0x7c, 0x51, 0x89, 0xf1, 0x2a, 0x4a, 0xe7,
-	0xae, 0xf8, 0x4e, 0x67, 0x71, 0x25, 0x37, 0x8b, 0x11, 0xd4, 0x04, 0x09, 0xfa, 0x50, 0xf2, 0x1b,
-	0x7d, 0x02, 0xad, 0x49, 0x98, 0x88, 0x06, 0x34, 0x93, 0x61, 0xd7, 0x2d, 0x2e, 0x2f, 0xb2, 0x28,
-	0xed, 0x6a, 0x26, 0xa6, 0xf0, 0xfe, 0xa7, 0xb0, 0x51, 0x30, 0xfd, 0xab, 0xc9, 0x78, 0x0c, 0x3b,
-	0x66, 0x9b, 0xf5, 0xe2, 0xfb, 0x00, 0x9a, 0x4c, 0xee, 0x6c, 0x88, 0xd8, 0x5c, 0x8b, 0xc8, 0x33,
-	0x76, 0xe7, 0x0f, 0x0b, 0x3a, 0xa2, 0x42, 0x9e, 0x05, 0x5c, 0xde, 0xee, 0xb9, 0x1b, 0x59, 0x35,
-	0x51, 0x7a, 0x23, 0xbf, 0x86, 0x2d, 0xcd, 0xe0, 0xf8, 0x6c, 0x35, 0xf6, 0xc9, 0x82, 0xcc, 0xc2,
-	0x88, 0x30, 0xbb, 0x22, 0x77, 0xd8, 0x73, 0x73, 0xab, 0xb8, 0x3a, 0x3b, 0x47, 0xab, 0x63, 0x03,
-	0x53, 0x47, 0x47, 0x93, 0xb7, 0x0c, 0xfd, 0x2f, 0x61, 0xe7, 0x0a, 0x78, 0x09, 0x1d, 0x83, 0x3c,
-	0x1d, 0x9d, 0x03, 0x70, 0x45, 0xf1, 0x8e, 0x62, 0x1c, 0xf3, 0x3c, 0x35, 0xdf, 0x5b, 0x60, 0xe7,
-	0xc2, 0x51, 0xb4, 0x9c, 0x12, 0xce, 0xf1, 0x05, 0x41, 0x8f, 0xf3, 0xad, 0xbc, 0x16, 0x78, 0x01,
-	0xa9, 0xc6, 0xa5, 0xbe, 0xc7, 0xa4, 0x4b, 0xff, 0x29, 0x40, 0xa6, 0x2c, 0x79, 0x27, 0x38, 0xc5,
-	0xf0, 0xba, 0x85, 0xb5, 0x73, 0x01, 0xbe, 0x82, 0x76, 0x1a, 0xb8, 0x48, 0x31, 0xf6, 0x7d, 0xe2,
-	0xeb, 0x73, 0x2a, 0x41, 0x24, 0x82, 0x91, 0x79, 0xb8, 0x20, 0xbe, 0x4e, 0xbd, 0x11, 0x65, 0x8a,
-	0x24, 0x61, 0xbe, 0xbe, 0xe0, 0x8d, 0x28, 0x2a, 0xbb, 0x71, 0x4c, 0x16, 0xc7, 0x78, 0x2d, 0x8f,
-	0x85, 0x97, 0xd5, 0x00, 0xea, 0x5c, 0xec, 0x5b, 0x46, 0xa1, 0x34, 0xa0, 0x8f, 0xa1, 0x3d, 0xc3,
-	0xf4, 0x22, 0xc1, 0xa2, 0x93, 0xaa, 0x92, 0xa5, 0x6d, 0x57, 0xad, 0xeb, 0x3e, 0x37, 0x06, 0xc5,
-	0x4b, 0x06, 0xec, 0x3f, 0x83, 0x5e, 0xd1, 0x58, 0xc2, 0xcf, 0xfb, 0xa5, 0x8f, 0x43, 0xf3, 0x18,
-	0x8b, 0x5a, 0xe0, 0xe8, 0x0e, 0xd4, 0x7c, 0xb2, 0x30, 0xb9, 0x42, 0xae, 0xd6, 0x8b, 0x68, 0x74,
-	0x04, 0xd2, 0xde, 0x7f, 0x02, 0xed, 0x54, 0x55, 0x52, 0x36, 0xbb, 0xc5, 0x7d, 0x9b, 0xfa, 0x34,
-	0xf9, 0x4d, 0x7f, 0xb4, 0xe0, 0x86, 0x58, 0x62, 0xbd, 0x97, 0x0e, 0xc4, 0x25, 0xb5, 0x32, 0x11,
-	0xdc, 0x72, 0x4b, 0x30, 0x22, 0xaa, 0x34, 0x1a, 0xbc, 0xe2, 0x62, 0x00, 0xfa, 0x64, 0x31, 0x56,
-	0x77, 0x51, 0x45, 0xb6, 0x51, 0xcb, 0x27, 0x8b, 0x13, 0x21, 0xf7, 0x0f, 0xa1, 0x9d, 0xe2, 0x4b,
-	0x42, 0xbd, 0x55, 0x0c, 0xb5, 0x65, 0x8e, 0x9c, 0x8f, 0xf5, 0x2b, 0x68, 0x8f, 0x08, 0x15, 0x0f,
-	0x60, 0x1a, 0x67, 0x13, 0x42, 0x2c, 0x52, 0xd1, 0x30, 0xf1, 0xf2, 0x11, 0x09, 0x27, 0x54, 0x26,
-	0x5a, 0x46, 0x60, 0xe4, 0x7c, 0x6d, 0x54, 0x0b, 0x3d, 0xee, 0xfc, 0x6a, 0xc1, 0xce, 0x50, 0xc1,
-	0xd2, 0x0d, 0x0c, 0x11, 0xaf, 0xe1, 0x1a, 0x37, 0x3a, 0x39, 0x01, 0xf0, 0x4a, 0x93, 0x72, 0xcf,
-	0xbd, 0xc2, 0xc7, 0x4d, 0x15, 0x47, 0xab, 0x63, 0xbc, 0xd2, 0x8f, 0x70, 0x5e, 0x50, 0xf6, 0x4f,
-	0xe1, 0x46, 0x09, 0xec, 0x7d, 0x7a, 0x3f, 0xdb, 0x2e, 0xc7, 0xcd, 0x1b, 0x80, 0xa1, 0x3c, 0x8d,
-	0x68, 0xbd, 0xd2, 0x07, 0x75, 0x1f, 0x5a, 0xa6, 0x6a, 0xcd, 0xed, 0x64, 0xe4, 0xac, 0x39, 0x6a,
-	0x57, 0x34, 0x87, 0xf3, 0x1d, 0x34, 0xd4, 0xfa, 0xe9, 0xcf, 0x93, 0x95, 0xfb, 0x79, 0xda, 0x83,
-	0xde, 0x72, 0x4a, 0xf2, 0xff, 0x46, 0x15, 0xf9, 0xf3, 0xd2, 0x15, 0xda, 0xf4, 0xb7, 0x67, 0x1b,
-	0x1a, 0x38, 0x89, 0xa7, 0x21, 0xd3, 0x0d, 0xac, 0x25, 0x74, 0xbb, 0xf8, 0xc2, 0xec, 0xb8, 0xd9,
-	0x49, 0xcc, 0x93, 0xe3, 0x8d, 0x78, 0x71, 0xc8, 0x64, 0xad, 0x17, 0xea, 0xed, 0xe2, 0xe4, 0x16,
-	0x55, 0xae, 0x90, 0x59, 0xeb, 0xdf, 0x86, 0xae, 0xda, 0xa9, 0x50, 0x9a, 0x1d, 0xa5, 0x93, 0xd5,
-	0xe9, 0xfc, 0x6c, 0xc1, 0xe6, 0xdb, 0x2b, 0x37, 0xa6, 0x04, 0xfb, 0x84, 0xc9, 0xa3, 0x76, 0x0e,
-	0xda, 0xe9, 0x5f, 0x96, 0xa7, 0x0d, 0xe8, 0xb1, 0x28, 0x37, 0x1a, 0xa7, 0xe5, 0x26, 0x3a, 0x65,
-	0xbd, 0x4b, 0x86, 0x1a, 0x90, 0xde, 0x82, 0x4a, 0x54, 0xb7, 0x60, 0xce, 0xf4, 0x4f, 0xff, 0x5f,
-	0xdd, 0x5c, 0xba, 0xcf, 0x1a, 0xf2, 0x7f, 0xf7, 0xe1, 0xdf, 0x01, 0x00, 0x00, 0xff, 0xff, 0x2e,
-	0x80, 0xba, 0x8e, 0xfb, 0x0e, 0x00, 0x00,
+	// 1568 bytes of a gzipped FileDescriptorProto
+	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x94, 0x57, 0xcd, 0x6e, 0xdb, 0xc6,
+	0x13, 0x07, 0x25, 0x51, 0x1f, 0x23, 0x59, 0x4e, 0x36, 0xfe, 0xdb, 0x8c, 0x02, 0xe7, 0xaf, 0x10,
+	0x6e, 0xe0, 0x36, 0x29, 0x13, 0x38, 0x3d, 0xa4, 0xe9, 0x25, 0xfe, 0x68, 0x10, 0x03, 0x71, 0xd3,
+	0xd2, 0x4e, 0x7a, 0x8b, 0xb0, 0x16, 0xd7, 0x12, 0x5b, 0x69, 0x49, 0xec, 0x92, 0x92, 0x05, 0xb4,
+	0xaf, 0xd2, 0x5b, 0x0f, 0x2d, 0xd0, 0x53, 0x5f, 0xa0, 0x87, 0x5e, 0x7a, 0xed, 0x43, 0x14, 0xe8,
+	0x5b, 0x14, 0xfb, 0x45, 0x91, 0x0a, 0x9d, 0xa6, 0x37, 0xce, 0xcc, 0x6f, 0x77, 0x67, 0x7e, 0x33,
+	0xb3, 0xb3, 0x84, 0x66, 0x7c, 0xee, 0xc5, 0x2c, 0x4a, 0x22, 0xf7, 0xaf, 0x0a, 0x34, 0x4f, 0x48,
+	0x82, 0x03, 0x9c, 0x60, 0xe4, 0x40, 0x63, 0x46, 0x18, 0x0f, 0x23, 0xea, 0x58, 0x7d, 0x6b, 0xd7,
+	0xf6, 0x8d, 0x88, 0x10, 0xd4, 0xc6, 0x98, 0x8f, 0x9d, 0x4a, 0xdf, 0xda, 0x6d, 0xf9, 0xf2, 0x1b,
+	0xdd, 0x06, 0x60, 0x24, 0x8e, 0x78, 0x98, 0x44, 0x6c, 0xe1, 0x54, 0xa5, 0x25, 0xa7, 0x41, 0x77,
+	0x61, 0xfd, 0x9c, 0x8c, 0x42, 0x3a, 0x48, 0x69, 0x78, 0x39, 0x48, 0xc2, 0x29, 0x71, 0x6a, 0x7d,
+	0x6b, 0xb7, 0xea, 0xaf, 0x49, 0xf5, 0x2b, 0x1a, 0x5e, 0x9e, 0x85, 0x53, 0x82, 0x5c, 0x58, 0x23,
+	0x34, 0xc8, 0xa1, 0x6c, 0x89, 0x6a, 0x13, 0x1a, 0x64, 0x18, 0x07, 0x1a, 0xc3, 0x68, 0x3a, 0x0d,
+	0x13, 0xee, 0xd4, 0x95, 0x67, 0x5a, 0x44, 0x37, 0xa1, 0xc9, 0x52, 0xaa, 0x16, 0x36, 0xe4, 0xc2,
+	0x06, 0x4b, 0xa9, 0x5c, 0xf4, 0x1c, 0xae, 0x1b, 0xd3, 0x20, 0x26, 0x6c, 0x10, 0x26, 0x64, 0xea,
+	0x34, 0xfb, 0xd5, 0xdd, 0xf6, 0xde, 0xb6, 0x67, 0x82, 0xf6, 0x7c, 0x85, 0xfe, 0x92, 0xb0, 0xe3,
+	0x84, 0x4c, 0x3f, 0xa7, 0x09, 0x5b, 0xf8, 0x5d, 0x56, 0x50, 0xf6, 0xf6, 0xe1, 0x46, 0x09, 0x0c,
+	0x5d, 0x83, 0xea, 0xb7, 0x64, 0x21, 0xb9, 0x6a, 0xf9, 0xe2, 0x13, 0x6d, 0x80, 0x3d, 0xc3, 0x93,
+	0x94, 0x48, 0xa2, 0x2c, 0x5f, 0x09, 0x4f, 0x2a, 0x8f, 0x2d, 0xf7, 0x11, 0x6c, 0x1d, 0xa4, 0x8c,
+	0x06, 0xd1, 0x9c, 0x9e, 0xc6, 0x98, 0x71, 0x72, 0x82, 0x13, 0x16, 0x5e, 0xfa, 0xd1, 0x5c, 0x05,
+	0x37, 0x49, 0xa7, 0x94, 0x3b, 0x56, 0xbf, 0xba, 0xbb, 0xe6, 0x1b, 0xd1, 0xfd, 0xd9, 0x82, 0x8d,
+	0xb2, 0x55, 0x22, 0x1f, 0x14, 0x4f, 0x89, 0x3e, 0x5a, 0x7e, 0xa3, 0x1d, 0xe8, 0xd2, 0x74, 0x7a,
+	0x4e, 0xd8, 0x20, 0xba, 0x18, 0xb0, 0x68, 0xce, 0xa5, 0x13, 0xb6, 0xdf, 0x51, 0xda, 0x97, 0x17,
+	0x7e, 0x34, 0xe7, 0xe8, 0x23, 0xb8, 0xbe, 0x44, 0x99, 0x63, 0xab, 0x12, 0xb8, 0x6e, 0x80, 0x87,
+	0x4a, 0x8d, 0xee, 0x43, 0x4d, 0xee, 0x53, 0x93, 0x9c, 0x39, 0xde, 0x15, 0x01, 0xf8, 0x12, 0xe5,
+	0x7e, 0x07, 0xdd, 0x67, 0xe1, 0x84, 0xf0, 0x97, 0x73, 0x4a, 0x18, 0x1f, 0x87, 0x31, 0x7a, 0x68,
+	0xd8, 0xb0, 0xe4, 0x06, 0x3d, 0xaf, 0x68, 0xf7, 0x5e, 0x0b, 0xa3, 0x62, 0x5c, 0x01, 0x7b, 0x8f,
+	0x01, 0x96, 0xca, 0x3c, 0xbf, 0x76, 0x09, 0xbf, 0x76, 0x9e, 0xdf, 0xbf, 0x2b, 0x4b, 0x82, 0xf7,
+	0x29, 0x9e, 0x2c, 0x78, 0xc8, 0x7d, 0xc2, 0xd3, 0x49, 0xc2, 0x51, 0x1f, 0xda, 0x23, 0x86, 0x69,
+	0x3a, 0xc1, 0x2c, 0x4c, 0xcc, 0x7e, 0x79, 0x15, 0xea, 0x41, 0x93, 0xe3, 0x69, 0x3c, 0x09, 0xe9,
+	0x48, 0x6f, 0x9d, 0xc9, 0xe8, 0x01, 0x34, 0x62, 0x16, 0x7d, 0x43, 0x86, 0x89, 0xe4, 0xa9, 0xbd,
+	0xf7, 0xbf, 0x72, 0x22, 0x0c, 0x0a, 0xdd, 0x03, 0xfb, 0x42, 0x04, 0xaa, 0x79, 0xbb, 0x02, 0xae,
+	0x30, 0xe8, 0x63, 0xa8, 0xc7, 0x24, 0x8a, 0x27, 0xa2, 0xec, 0xdf, 0x81, 0xd6, 0x20, 0x74, 0x0c,
+	0x48, 0x7d, 0x0d, 0x42, 0x9a, 0x10, 0x86, 0x87, 0x89, 0xe8, 0xd6, 0xba, 0xf4, 0xab, 0xe7, 0x1d,
+	0x46, 0xd3, 0x98, 0x11, 0xce, 0x49, 0xa0, 0x16, 0xfb, 0xd1, 0x5c, 0xaf, 0xbf, 0xae, 0x56, 0x1d,
+	0x2f, 0x17, 0xa1, 0xc7, 0xb0, 0x2e, 0x5d, 0x18, 0x44, 0x26, 0x21, 0x4e, 0x43, 0xba, 0xb0, 0xbe,
+	0x92, 0x27, 0xbf, 0x7b, 0x51, 0x90, 0xdd, 0x5f, 0x2d, 0xb8, 0x79, 0xe5, 0x51, 0x25, 0x75, 0x68,
+	0xbd, 0x6f, 0x1d, 0x56, 0xca, 0xeb, 0x10, 0x41, 0x4d, 0xb4, 0xaa, 0x53, 0xed, 0x57, 0x77, 0xab,
+	0x7e, 0xcd, 0xdc, 0x55, 0x21, 0x0d, 0xc2, 0xa1, 0xa6, 0xd9, 0xf6, 0x8d, 0x88, 0x36, 0xa1, 0x1e,
+	0xd2, 0x20, 0x4e, 0x98, 0x64, 0xb4, 0xea, 0x6b, 0xc9, 0x3d, 0x85, 0xc6, 0x61, 0x94, 0xc6, 0x82,
+	0xf4, 0x0d, 0xb0, 0x43, 0x1a, 0x90, 0x4b, 0x59, 0x98, 0x2d, 0x5f, 0x09, 0x68, 0x0f, 0xea, 0x53,
+	0x19, 0x82, 0xf4, 0xe3, 0xdd, 0x7c, 0x6a, 0xa4, 0xbb, 0x03, 0x9d, 0xb3, 0x28, 0x1d, 0x8e, 0x49,
+	0x20, 0x39, 0x13, 0x3b, 0xab, 0xdc, 0x5b, 0xd2, 0x29, 0x25, 0xb8, 0x7f, 0x58, 0xb0, 0xa9, 0xcf,
+	0x5e, 0xad, 0xcd, 0x7b, 0xd0, 0x11, 0x98, 0xc1, 0x50, 0x99, 0x75, 0x2a, 0x9b, 0x9e, 0x86, 0xfb,
+	0x6d, 0x61, 0x35, 0x7e, 0x3f, 0x80, 0xae, 0xce, 0xbe, 0x81, 0x37, 0x56, 0xe0, 0x6b, 0xca, 0x6e,
+	0x16, 0x3c, 0x84, 0x8e, 0x5e, 0xa0, 0xbc, 0x52, 0xb7, 0xdf, 0x9a, 0x97, 0xf7, 0xd9, 0x6f, 0x2b,
+	0x88, 0x0a, 0xe0, 0xff, 0xd0, 0x56, 0x55, 0x31, 0x09, 0x29, 0xe1, 0x4e, 0x4b, 0x86, 0x01, 0x52,
+	0xf5, 0x42, 0x68, 0xdc, 0x1f, 0x2d, 0x80, 0x57, 0xfb, 0xa7, 0x67, 0x87, 0x63, 0x4c, 0x47, 0x04,
+	0xdd, 0x82, 0x96, 0xf4, 0x3f, 0x77, 0x1d, 0x35, 0x85, 0xe2, 0x0b, 0x71, 0x25, 0x6d, 0x03, 0x70,
+	0x36, 0x1c, 0x9c, 0x93, 0x8b, 0x88, 0x11, 0x3d, 0x3c, 0x5a, 0x9c, 0x0d, 0x0f, 0xa4, 0x42, 0xac,
+	0x15, 0x66, 0x7c, 0x91, 0x10, 0xa6, 0x07, 0x48, 0x93, 0xb3, 0xe1, 0xbe, 0x90, 0x85, 0x23, 0x29,
+	0xe6, 0x89, 0x59, 0x5c, 0x53, 0xf3, 0x45, 0xa8, 0xf4, 0xea, 0x6d, 0x90, 0x92, 0x5e, 0x6e, 0xab,
+	0xcd, 0x85, 0x46, 0xae, 0x77, 0x9f, 0xc2, 0xd6, 0xd2, 0x4d, 0x7e, 0x8a, 0x67, 0x84, 0x19, 0xce,
+	0x3f, 0x80, 0xc6, 0x50, 0xa9, 0xf5, 0xcd, 0xd4, 0xf6, 0x96, 0x50, 0xdf, 0xd8, 0xdc, 0xdf, 0x2d,
+	0xe8, 0x9e, 0x8e, 0xa3, 0x84, 0x12, 0xce, 0x7d, 0x32, 0x8c, 0x58, 0x20, 0x2a, 0x31, 0x59, 0xc4,
+	0xd9, 0xbd, 0x2b, 0xbe, 0xb3, 0xbb, 0xb8, 0x92, 0xbb, 0x8b, 0x11, 0xd4, 0x04, 0x09, 0x3a, 0x28,
+	0xf9, 0x8d, 0x3e, 0x85, 0xe6, 0x30, 0x4a, 0x45, 0x03, 0x9a, 0x9b, 0x61, 0xdb, 0x2b, 0x6e, 0x2f,
+	0xb2, 0x28, 0xed, 0xea, 0x4e, 0xcc, 0xe0, 0xbd, 0xcf, 0x60, 0xad, 0x60, 0xfa, 0x4f, 0x37, 0xe3,
+	0x11, 0x6c, 0x99, 0x63, 0x56, 0x8b, 0xef, 0x43, 0x68, 0x30, 0x79, 0xb2, 0x21, 0x62, 0x7d, 0xc5,
+	0x23, 0xdf, 0xd8, 0xdd, 0x3f, 0x2d, 0x68, 0x8b, 0x0a, 0x79, 0x1e, 0x72, 0x39, 0xdd, 0x73, 0x13,
+	0x59, 0x35, 0x51, 0x36, 0x91, 0x5f, 0xc3, 0x86, 0x66, 0x70, 0x70, 0xbe, 0x18, 0x04, 0x64, 0x46,
+	0x26, 0x51, 0x4c, 0x98, 0x53, 0x91, 0x27, 0xec, 0x78, 0xb9, 0x5d, 0x3c, 0x9d, 0x9d, 0x83, 0xc5,
+	0x91, 0x81, 0xa9, 0xd0, 0xd1, 0xf0, 0x2d, 0x43, 0xef, 0x2b, 0xd8, 0xba, 0x02, 0x5e, 0x42, 0x47,
+	0x3f, 0x4f, 0x47, 0x7b, 0x0f, 0x3c, 0x51, 0xbc, 0xa7, 0x09, 0x4e, 0x78, 0x9e, 0x9a, 0x1f, 0x2c,
+	0x70, 0x72, 0xee, 0x28, 0x5a, 0x4e, 0x08, 0xe7, 0x78, 0x44, 0xd0, 0x93, 0x7c, 0x2b, 0xaf, 0x38,
+	0x5e, 0x40, 0xaa, 0xeb, 0x52, 0xcf, 0x31, 0xb9, 0xa4, 0xf7, 0x0c, 0x60, 0xa9, 0x2c, 0x79, 0x27,
+	0xb8, 0x45, 0xf7, 0x3a, 0x85, 0xbd, 0x73, 0x0e, 0xbe, 0x82, 0x56, 0xe6, 0xb8, 0x48, 0x31, 0x0e,
+	0x02, 0x12, 0xe8, 0x38, 0x95, 0x20, 0x12, 0xc1, 0xc8, 0x34, 0x9a, 0x91, 0x40, 0xa7, 0xde, 0x88,
+	0x32, 0x45, 0x92, 0xb0, 0x40, 0x0f, 0x78, 0x23, 0x8a, 0xca, 0xae, 0x1f, 0x91, 0xd9, 0x11, 0x5e,
+	0xc9, 0x63, 0xe1, 0x65, 0xd5, 0x07, 0x9b, 0x8b, 0x73, 0xcb, 0x28, 0x94, 0x06, 0xf4, 0x09, 0xb4,
+	0x26, 0x98, 0x8e, 0x52, 0x2c, 0x3a, 0xa9, 0x2a, 0x59, 0xda, 0xf4, 0xd4, 0xbe, 0xde, 0x0b, 0x63,
+	0x50, 0xbc, 0x2c, 0x81, 0xbd, 0xe7, 0xd0, 0x2d, 0x1a, 0x4b, 0xf8, 0x79, 0xbf, 0xf4, 0x71, 0x68,
+	0x1c, 0x61, 0x51, 0x0b, 0x1c, 0xdd, 0x85, 0x5a, 0x40, 0x66, 0x26, 0x57, 0xc8, 0xd3, 0x7a, 0xe1,
+	0x8d, 0xf6, 0x40, 0xda, 0x7b, 0x4f, 0xa1, 0x95, 0xa9, 0x4a, 0xca, 0x66, 0xbb, 0x78, 0x6e, 0x43,
+	0x47, 0x93, 0x3f, 0xf4, 0x27, 0x0b, 0x6e, 0x88, 0x2d, 0x56, 0x7b, 0x69, 0x4f, 0x0c, 0xa9, 0x85,
+	0xf1, 0xe0, 0xb6, 0x57, 0x82, 0x11, 0x5e, 0x65, 0xde, 0xe0, 0x05, 0x17, 0x17, 0x60, 0x40, 0x66,
+	0x03, 0x35, 0x8b, 0x2a, 0xb2, 0x8d, 0x9a, 0x01, 0x99, 0x1d, 0x0b, 0xb9, 0xb7, 0x0f, 0xad, 0x0c,
+	0x5f, 0xe2, 0xea, 0xed, 0xa2, 0xab, 0x4d, 0x13, 0x72, 0xde, 0xd7, 0xaf, 0xa1, 0x75, 0x4a, 0xa8,
+	0x78, 0x00, 0xd3, 0x64, 0x79, 0x43, 0x88, 0x4d, 0x2a, 0x1a, 0x26, 0x5e, 0x3e, 0x22, 0xe1, 0x84,
+	0xca, 0x44, 0x4b, 0x0f, 0x8c, 0x9c, 0xaf, 0x8d, 0x6a, 0xa1, 0xc7, 0xdd, 0xdf, 0x2c, 0xd8, 0x3a,
+	0x54, 0xb0, 0xec, 0x00, 0x43, 0xc4, 0x6b, 0xb8, 0xc6, 0x8d, 0x4e, 0xde, 0x00, 0x78, 0xa1, 0x49,
+	0xb9, 0xef, 0x5d, 0xb1, 0xc6, 0xcb, 0x14, 0x07, 0x8b, 0x23, 0xbc, 0xd0, 0x8f, 0x70, 0x5e, 0x50,
+	0xf6, 0x4e, 0xe0, 0x46, 0x09, 0xec, 0x7d, 0x7a, 0x7f, 0x79, 0x5c, 0x8e, 0x9b, 0x37, 0x00, 0x87,
+	0x32, 0x1a, 0xd1, 0x7a, 0xa5, 0x0f, 0xea, 0x1e, 0x34, 0x4d, 0xd5, 0x9a, 0xe9, 0x64, 0xe4, 0x65,
+	0x73, 0xd4, 0xae, 0x68, 0x0e, 0xf7, 0x7b, 0xa8, 0xab, 0xfd, 0xb3, 0x9f, 0x27, 0x2b, 0xf7, 0xf3,
+	0xb4, 0x03, 0xdd, 0xf9, 0x98, 0xe4, 0xff, 0x8d, 0x2a, 0xf2, 0xe7, 0xa5, 0x23, 0xb4, 0xd9, 0x6f,
+	0xcf, 0x26, 0xd4, 0x71, 0x9a, 0x8c, 0x23, 0xa6, 0x1b, 0x58, 0x4b, 0xe8, 0x4e, 0xf1, 0x85, 0xd9,
+	0xf6, 0x96, 0x91, 0x98, 0x27, 0xc7, 0x1b, 0xf1, 0xe2, 0x90, 0xc9, 0x5a, 0x2d, 0xd4, 0x3b, 0xc5,
+	0x9b, 0x5b, 0x54, 0xb9, 0x42, 0x2e, 0x5b, 0xff, 0x0e, 0x74, 0xd4, 0x49, 0x85, 0xd2, 0x6c, 0x2b,
+	0x9d, 0xac, 0x4e, 0x77, 0x06, 0xb5, 0xb3, 0x45, 0x1c, 0x89, 0xaa, 0x9a, 0xb3, 0x88, 0x8e, 0x74,
+	0x74, 0x4a, 0x50, 0x95, 0xc3, 0x98, 0x78, 0x33, 0xab, 0xb1, 0x68, 0x44, 0x11, 0x92, 0x3a, 0x45,
+	0x53, 0xaa, 0xa5, 0x6c, 0x62, 0xd6, 0x72, 0x13, 0x13, 0x41, 0x4d, 0xbc, 0x42, 0xe4, 0x6c, 0xb7,
+	0x7d, 0xf9, 0xed, 0xde, 0x83, 0x8e, 0x38, 0x97, 0x1f, 0xe1, 0x04, 0x73, 0x92, 0xa0, 0x5b, 0x60,
+	0x27, 0x42, 0xd6, 0xb1, 0xd8, 0x9e, 0xb0, 0xfa, 0x4a, 0xe7, 0xfe, 0x62, 0xc1, 0xfa, 0xdb, 0xe1,
+	0xd7, 0xc7, 0x04, 0x07, 0x84, 0x49, 0x8f, 0xdb, 0x7b, 0xad, 0xec, 0x57, 0xd0, 0xd7, 0x06, 0xf4,
+	0x44, 0xf4, 0x04, 0x4d, 0xb2, 0x9e, 0x10, 0xed, 0xbc, 0xda, 0xca, 0x87, 0x1a, 0x90, 0x8d, 0x6a,
+	0x25, 0xaa, 0x51, 0x9d, 0x33, 0xfd, 0xdb, 0x4f, 0x62, 0x27, 0x57, 0x93, 0xe7, 0x75, 0xf9, 0x53,
+	0xfe, 0xe8, 0x9f, 0x00, 0x00, 0x00, 0xff, 0xff, 0x1e, 0xab, 0x56, 0x6d, 0xa0, 0x0f, 0x00, 0x00,
 }

+ 12 - 0
internal/pb/pb.proto

@@ -165,6 +165,18 @@ message CommitsAnalysisResults {
     repeated string author_index = 2;
 }
 
+message Typo {
+    string wrong = 1;
+    string correct = 2;
+    string commit = 3;
+    string file = 4;
+    int32 line = 5;
+}
+
+message TyposDataset {
+    repeated Typo typos = 1;
+}
+
 message AnalysisResults {
     Metadata header = 1;
     // the mapped values are dynamic messages which require the second parsing pass.

Rozdílová data souboru nebyla zobrazena, protože soubor je příliš velký
+ 112 - 5
internal/pb/pb_pb2.py


+ 59 - 15
internal/plumbing/uast/uast.go

@@ -32,11 +32,12 @@ import (
 // It is a PipelineItem.
 type Extractor struct {
 	core.NoopMerger
-	Endpoint       string
-	Context        func() (context.Context, context.CancelFunc)
-	PoolSize       int
-	FailOnErrors   bool
-	ProcessedFiles map[string]int
+	Endpoint              string
+	Context               func() (context.Context, context.CancelFunc)
+	PoolSize              int
+	FailOnErrors          bool
+	ProcessedFiles        map[string]int
+	IgnoredMissingDrivers map[string]bool
 
 	clients []*bblfsh.Client
 	pool    *tunny.Pool
@@ -45,22 +46,36 @@ type Extractor struct {
 const (
 	// ConfigUASTEndpoint is the name of the configuration option (Extractor.Configure())
 	// which sets the Babelfish server address.
-	ConfigUASTEndpoint = "ConfigUASTEndpoint"
+	ConfigUASTEndpoint = "UAST.Endpoint"
 	// ConfigUASTTimeout is the name of the configuration option (Extractor.Configure())
 	// which sets the maximum amount of time to wait for a Babelfish server response.
-	ConfigUASTTimeout = "ConfigUASTTimeout"
+	ConfigUASTTimeout = "UAST.Timeout"
 	// ConfigUASTPoolSize is the name of the configuration option (Extractor.Configure())
 	// which sets the number of goroutines to run for UAST parse queries.
-	ConfigUASTPoolSize = "ConfigUASTPoolSize"
+	ConfigUASTPoolSize = "UAST.PoolSize"
 	// ConfigUASTFailOnErrors is the name of the configuration option (Extractor.Configure())
 	// which enables early exit in case of any Babelfish UAST parsing errors.
-	ConfigUASTFailOnErrors = "ConfigUASTFailOnErrors"
+	ConfigUASTFailOnErrors = "UAST.FailOnErrors"
+	// ConfigUASTIgnoreMissingDrivers is the name of the configuration option (Extractor.Configure())
+	// which sets the ignored missing driver names.
+	ConfigUASTIgnoreMissingDrivers = "UAST.IgnoreMissingDrivers"
+	// DefaultBabelfishEndpoint is the default address of the Babelfish parsing server.
+	DefaultBabelfishEndpoint = "0.0.0.0:9432"
+	// DefaultBabelfishTimeout is the default value of the RPC timeout in seconds.
+	DefaultBabelfishTimeout = 20
 	// FeatureUast is the name of the Pipeline feature which activates all the items related to UAST.
 	FeatureUast = "uast"
 	// DependencyUasts is the name of the dependency provided by Extractor.
 	DependencyUasts = "uasts"
 )
 
+var (
+	// DefaultBabelfishWorkers is the default number of parsing RPC goroutines.
+	DefaultBabelfishWorkers = runtime.NumCPU() * 2
+	// DefaultIgnoredMissingDrivers is the languages which are ignored if the Babelfish driver is missing.
+	DefaultIgnoredMissingDrivers = []string{"markdown", "text", "yaml", "json"}
+)
+
 type uastTask struct {
 	Lock   *sync.RWMutex
 	Dest   map[plumbing.Hash]nodes.Node
@@ -117,22 +132,27 @@ func (exr *Extractor) ListConfigurationOptions() []core.ConfigurationOption {
 		Description: "How many days there are in a single band.",
 		Flag:        "bblfsh",
 		Type:        core.StringConfigurationOption,
-		Default:     "0.0.0.0:9432"}, {
+		Default:     DefaultBabelfishEndpoint}, {
 		Name:        ConfigUASTTimeout,
 		Description: "Babelfish's server timeout in seconds.",
 		Flag:        "bblfsh-timeout",
 		Type:        core.IntConfigurationOption,
-		Default:     20}, {
+		Default:     DefaultBabelfishTimeout}, {
 		Name:        ConfigUASTPoolSize,
 		Description: "Number of goroutines to extract UASTs.",
 		Flag:        "bblfsh-pool-size",
 		Type:        core.IntConfigurationOption,
-		Default:     runtime.NumCPU() * 2}, {
+		Default:     DefaultBabelfishWorkers}, {
 		Name:        ConfigUASTFailOnErrors,
 		Description: "Panic if there is a UAST extraction error.",
 		Flag:        "bblfsh-fail-on-error",
 		Type:        core.BoolConfigurationOption,
-		Default:     false},
+		Default:     false}, {
+		Name:        ConfigUASTIgnoreMissingDrivers,
+		Description: "Do not warn about missing drivers for the specified languages.",
+		Flag:        "bblfsh-ignored-drivers",
+		Type:        core.StringsConfigurationOption,
+		Default:     DefaultIgnoredMissingDrivers},
 	}
 	return options[:]
 }
@@ -154,6 +174,12 @@ func (exr *Extractor) Configure(facts map[string]interface{}) error {
 	if val, exists := facts[ConfigUASTFailOnErrors].(bool); exists {
 		exr.FailOnErrors = val
 	}
+	if val, exists := facts[ConfigUASTIgnoreMissingDrivers].([]string); exists {
+		exr.IgnoredMissingDrivers = map[string]bool{}
+		for _, name := range val {
+			exr.IgnoredMissingDrivers[name] = true
+		}
+	}
 	return nil
 }
 
@@ -162,9 +188,16 @@ func (exr *Extractor) Configure(facts map[string]interface{}) error {
 func (exr *Extractor) Initialize(repository *git.Repository) error {
 	if exr.Context == nil {
 		exr.Context = func() (context.Context, context.CancelFunc) {
-			return context.Background(), nil
+			return context.WithTimeout(context.Background(),
+				time.Duration(DefaultBabelfishTimeout)*time.Second)
 		}
 	}
+	if exr.Endpoint == "" {
+		exr.Endpoint = DefaultBabelfishEndpoint
+	}
+	if exr.PoolSize == 0 {
+		exr.PoolSize = DefaultBabelfishWorkers
+	}
 	poolSize := exr.PoolSize
 	if poolSize == 0 {
 		poolSize = runtime.NumCPU()
@@ -196,6 +229,12 @@ func (exr *Extractor) Initialize(repository *git.Repository) error {
 		panic("UAST goroutine pool was not created")
 	}
 	exr.ProcessedFiles = map[string]int{}
+	if exr.IgnoredMissingDrivers == nil {
+		exr.IgnoredMissingDrivers = map[string]bool{}
+		for _, name := range DefaultIgnoredMissingDrivers {
+			exr.IgnoredMissingDrivers[name] = true
+		}
+	}
 	return nil
 }
 
@@ -250,7 +289,7 @@ func (exr *Extractor) Consume(deps map[string]interface{}) (map[string]interface
 		if exr.FailOnErrors {
 			return nil, errors.New(joined)
 		}
-		fmt.Fprintln(os.Stderr, joined)
+		log.Println(joined)
 	}
 	return map[string]interface{}{DependencyUasts: uasts}, nil
 }
@@ -284,6 +323,11 @@ func (exr *Extractor) extractTask(client *bblfsh.Client, data interface{}) inter
 	task.Lock.Lock()
 	defer task.Lock.Unlock()
 	if err != nil {
+		for lang := range exr.IgnoredMissingDrivers {
+			if strings.HasSuffix(err.Error(), "\""+lang+"\"") {
+				return nil
+			}
+		}
 		*task.Errors = append(*task.Errors,
 			fmt.Errorf("\nfile %s, blob %s: %v", task.Name, task.Hash.String(), err))
 		return nil

+ 33 - 2
internal/plumbing/uast/uast_test.go

@@ -50,11 +50,12 @@ func TestUASTExtractorMeta(t *testing.T) {
 	assert.Equal(t, exr.Requires()[0], items.DependencyTreeChanges)
 	assert.Equal(t, exr.Requires()[1], items.DependencyBlobCache)
 	opts := exr.ListConfigurationOptions()
-	assert.Len(t, opts, 4)
+	assert.Len(t, opts, 5)
 	assert.Equal(t, opts[0].Name, ConfigUASTEndpoint)
 	assert.Equal(t, opts[1].Name, ConfigUASTTimeout)
 	assert.Equal(t, opts[2].Name, ConfigUASTPoolSize)
 	assert.Equal(t, opts[3].Name, ConfigUASTFailOnErrors)
+	assert.Equal(t, opts[4].Name, ConfigUASTIgnoreMissingDrivers)
 	feats := exr.Features()
 	assert.Len(t, feats, 1)
 	assert.Equal(t, feats[0], FeatureUast)
@@ -68,11 +69,13 @@ func TestUASTExtractorConfiguration(t *testing.T) {
 	facts[ConfigUASTTimeout] = 15
 	facts[ConfigUASTPoolSize] = 7
 	facts[ConfigUASTFailOnErrors] = true
+	facts[ConfigUASTIgnoreMissingDrivers] = []string{"test"}
 	exr.Configure(facts)
 	assert.Equal(t, exr.Endpoint, facts[ConfigUASTEndpoint])
 	assert.NotNil(t, exr.Context)
 	assert.Equal(t, exr.PoolSize, facts[ConfigUASTPoolSize])
 	assert.Equal(t, exr.FailOnErrors, true)
+	assert.Equal(t, exr.IgnoredMissingDrivers, map[string]bool{"test": true})
 }
 
 func TestUASTExtractorRegistration(t *testing.T) {
@@ -92,7 +95,7 @@ func TestUASTExtractorNoBabelfish(t *testing.T) {
 
 func TestUASTExtractorConsume(t *testing.T) {
 	exr := fixtureUASTExtractor()
-	changes := make(object.Changes, 3)
+	changes := make(object.Changes, 4)
 	// 2b1ed978194a94edeabbca6de7ff3b5771d4d665
 	treeFrom, _ := test.Repository.TreeObject(plumbing.NewHash(
 		"96c6ece9b2f3c7c51b83516400d278dea5605100"))
@@ -136,6 +139,16 @@ func TestUASTExtractorConsume(t *testing.T) {
 		},
 	},
 	}
+	changes[3] = &object.Change{From: object.ChangeEntry{}, To: object.ChangeEntry{
+		Name: "README.md",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: "README.md",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("5248c86995f6d60eb57730da18b5e020a4341863"),
+		},
+	},
+	}
 	cache := map[plumbing.Hash]*items.CachedBlob{}
 	for _, hash := range []string{
 		"baa64828831d174f40140e4b3cfa77d1e917a2c1",
@@ -143,6 +156,7 @@ func TestUASTExtractorConsume(t *testing.T) {
 		"c29112dbd697ad9b401333b80c18a63951bc18d9",
 		"f7d918ec500e2f925ecde79b51cc007bac27de72",
 		"81f2b6d1fa5357f90e9dead150cd515720897545",
+		"5248c86995f6d60eb57730da18b5e020a4341863",
 	} {
 		AddHash(t, cache, hash)
 	}
@@ -158,6 +172,12 @@ func TestUASTExtractorConsume(t *testing.T) {
 	assert.Len(t, res[DependencyUasts], 1)
 	assert.Nil(t, err)
 
+	exr.FailOnErrors = true
+	res, err = exr.Consume(deps)
+	assert.Nil(t, res)
+	assert.NotNil(t, err)
+	exr.FailOnErrors = false
+
 	hash := plumbing.NewHash("5d78f57d732aed825764347ec6f3ab74d50d0619")
 	changes[1] = &object.Change{From: object.ChangeEntry{}, To: object.ChangeEntry{
 		Name: "labours.py",
@@ -176,6 +196,17 @@ func TestUASTExtractorConsume(t *testing.T) {
 	uasts := res[DependencyUasts].(map[plumbing.Hash]nodes.Node)
 	assert.Equal(t, len(uasts), 1)
 	assert.Equal(t, len(uasts[hash].(nodes.Object)["body"].(nodes.Array)), 24)
+
+	exr.IgnoredMissingDrivers = map[string]bool{}
+	changes[2] = changes[3]
+	deps[items.DependencyTreeChanges] = changes[:3]
+	res, err = exr.Consume(deps)
+	assert.Nil(t, err)
+	exr.FailOnErrors = true
+	res, err = exr.Consume(deps)
+	assert.Nil(t, res)
+	assert.NotNil(t, err)
+	exr.FailOnErrors = false
 }
 
 func TestUASTExtractorFork(t *testing.T) {

+ 291 - 0
leaves/research/typos.go

@@ -0,0 +1,291 @@
+package research
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"unicode/utf8"
+
+	"github.com/gogo/protobuf/proto"
+	"github.com/sergi/go-diff/diffmatchpatch"
+	"gopkg.in/bblfsh/sdk.v2/uast"
+	"gopkg.in/bblfsh/sdk.v2/uast/nodes"
+	"gopkg.in/src-d/go-git.v4"
+	"gopkg.in/src-d/go-git.v4/plumbing"
+	"gopkg.in/src-d/go-git.v4/plumbing/object"
+	"gopkg.in/src-d/hercules.v9/internal/core"
+	"gopkg.in/src-d/hercules.v9/internal/levenshtein"
+	"gopkg.in/src-d/hercules.v9/internal/pb"
+	items "gopkg.in/src-d/hercules.v9/internal/plumbing"
+	uast_items "gopkg.in/src-d/hercules.v9/internal/plumbing/uast"
+)
+
+// TyposDatasetBuilder collects pairs of typo-fix in source code identifiers.
+type TyposDatasetBuilder struct {
+	core.NoopMerger
+
+	// MaximumAllowedDistance is the maximum Levenshtein distance between two identifiers
+	// to consider them a typo-fix pair.
+	MaximumAllowedDistance int
+
+	// typos stores the found typo-fix pairs.
+	typos []Typo
+	// lcontext is the Context for measuring Levenshtein distance between lines.
+	lcontext *levenshtein.Context
+	// xpather filters identifiers.
+	xpather uast_items.ChangesXPather
+}
+
+// TyposResult is returned by TyposDatasetBuilder.Finalize() and carries the found typo-fix
+// pairs of identifiers.
+type TyposResult struct {
+	Typos []Typo
+}
+
+// Typo carries the information about a typo-fix pair.
+type Typo struct {
+	Wrong   string
+	Correct string
+	Commit  plumbing.Hash
+	File    string
+	Line    int
+}
+
+const (
+	// DefaultMaximumAllowedTypoDistance is the default value of the maximum Levenshtein distance
+	// between two identifiers to consider them a typo-fix pair.
+	DefaultMaximumAllowedTypoDistance = 4
+	// ConfigTyposDatasetMaximumAllowedDistance is the name of the configuration option
+	// (`TyposDatasetBuilder.Configure()`) which sets the maximum Levenshtein distance between
+	// two identifiers to consider them a typo-fix pair.
+	ConfigTyposDatasetMaximumAllowedDistance = "TyposDatasetBuilder.MaximumAllowedDistance"
+)
+
+// Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
+func (tdb *TyposDatasetBuilder) Name() string {
+	return "TyposDataset"
+}
+
+// Provides returns the list of names of entities which are produced by this PipelineItem.
+// Each produced entity will be inserted into `deps` of dependent Consume()-s according
+// to this list. Also used by core.Registry to build the global map of providers.
+func (tdb *TyposDatasetBuilder) Provides() []string {
+	return []string{}
+}
+
+// Requires returns the list of names of entities which are needed by this PipelineItem.
+// Each requested entity will be inserted into `deps` of Consume(). In turn, those
+// entities are Provides() upstream.
+func (tdb *TyposDatasetBuilder) Requires() []string {
+	arr := [...]string{
+		uast_items.DependencyUastChanges, items.DependencyFileDiff, items.DependencyBlobCache}
+	return arr[:]
+}
+
+// ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
+func (tdb *TyposDatasetBuilder) ListConfigurationOptions() []core.ConfigurationOption {
+	options := [...]core.ConfigurationOption{{
+		Name: ConfigTyposDatasetMaximumAllowedDistance,
+		Description: "Maximum Levenshtein distance between two identifiers to consider them " +
+			"a typo-fix pair.",
+		Flag:    "typos-max-distance",
+		Type:    core.IntConfigurationOption,
+		Default: DefaultMaximumAllowedTypoDistance},
+	}
+	return options[:]
+}
+
+// Configure sets the properties previously published by ListConfigurationOptions().
+func (tdb *TyposDatasetBuilder) Configure(facts map[string]interface{}) error {
+	if val, exists := facts[ConfigTyposDatasetMaximumAllowedDistance].(int); exists {
+		tdb.MaximumAllowedDistance = val
+	}
+	return nil
+}
+
+// Flag for the command line switch which enables this analysis.
+func (tdb *TyposDatasetBuilder) Flag() string {
+	return "typos-dataset"
+}
+
+// Description returns the text which explains what the analysis is doing.
+func (tdb *TyposDatasetBuilder) Description() string {
+	return "Extracts typo-fix identifier pairs from source code in commit diffs."
+}
+
+// Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
+// calls. The repository which is going to be analysed is supplied as an argument.
+func (tdb *TyposDatasetBuilder) Initialize(repository *git.Repository) error {
+	if tdb.MaximumAllowedDistance <= 0 {
+		tdb.MaximumAllowedDistance = DefaultMaximumAllowedTypoDistance
+	}
+	tdb.lcontext = &levenshtein.Context{}
+	tdb.xpather.XPath = "//uast:Identifier"
+	return nil
+}
+
+type candidate struct {
+	Before int
+	After  int
+}
+
+// Consume runs this PipelineItem on the next commit data.
+// `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
+// Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
+// This function returns the mapping with analysis results. The keys must be the same as
+// in Provides(). If there was an error, nil is returned.
+func (tdb *TyposDatasetBuilder) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
+	if deps[core.DependencyIsMerge].(bool) {
+		return nil, nil
+	}
+	commit := deps[core.DependencyCommit].(*object.Commit).Hash
+	cache := deps[items.DependencyBlobCache].(map[plumbing.Hash]*items.CachedBlob)
+	diffs := deps[items.DependencyFileDiff].(map[string]items.FileDiffData)
+	changes := deps[uast_items.DependencyUastChanges].([]uast_items.Change)
+	for _, change := range changes {
+		if change.Before == nil || change.After == nil {
+			continue
+		}
+		linesBefore := bytes.Split(cache[change.Change.From.TreeEntry.Hash].Data, []byte{'\n'})
+		linesAfter := bytes.Split(cache[change.Change.To.TreeEntry.Hash].Data, []byte{'\n'})
+		diff := diffs[change.Change.To.Name]
+		var lineNumBefore, lineNumAfter int
+		clear := false
+		var candidates []candidate
+		focusedLinesBefore := map[int]bool{}
+		focusedLinesAfter := map[int]bool{}
+		for _, edit := range diff.Diffs {
+			size := utf8.RuneCountInString(edit.Text)
+			switch edit.Type {
+			case diffmatchpatch.DiffDelete:
+				lineNumBefore += size
+				clear = size == 1
+			case diffmatchpatch.DiffInsert:
+				if size == 1 && clear {
+					dist := tdb.lcontext.Distance(
+						string(linesBefore[lineNumBefore-1]),
+						string(linesAfter[lineNumAfter]))
+					if dist <= tdb.MaximumAllowedDistance {
+						candidates = append(candidates, candidate{lineNumBefore - 1, lineNumAfter})
+						focusedLinesBefore[lineNumBefore-1] = true
+						focusedLinesAfter[lineNumAfter] = true
+					}
+				}
+				lineNumAfter += size
+				clear = false
+			case diffmatchpatch.DiffEqual:
+				lineNumBefore += size
+				lineNumAfter += size
+				clear = false
+			}
+		}
+		if len(candidates) == 0 {
+			continue
+		}
+		// at this point we have pairs of very similar lines
+		// we need to build the line mappings of the identifiers before/after the change
+		// we should keep only those which are present on those focused lines
+		nodesAdded, nodesRemoved := tdb.xpather.Extract([]uast_items.Change{change})
+		addedIdentifiers := map[int][]nodes.Node{}
+		removedIdentifiers := map[int][]nodes.Node{}
+		for _, n := range nodesAdded {
+			pos := uast.PositionsOf(n.(nodes.Object))
+			if pos.Start() != nil {
+				line := int(pos.Start().Line) - 1
+				if focusedLinesAfter[line] {
+					addedIdentifiers[line] = append(addedIdentifiers[line], n)
+				}
+			}
+		}
+		for _, n := range nodesRemoved {
+			pos := uast.PositionsOf(n.(nodes.Object))
+			line := int(pos.Start().Line) - 1
+			if pos.Start() != nil {
+				if focusedLinesBefore[line] {
+					removedIdentifiers[line] = append(removedIdentifiers[line], n)
+				}
+			}
+		}
+		for _, c := range candidates {
+			nodesBefore := addedIdentifiers[c.Before]
+			nodesAfter := removedIdentifiers[c.After]
+			if len(nodesBefore) == 1 && len(nodesAfter) == 1 {
+				idBefore := string(nodesBefore[0].(nodes.Object)["Name"].(nodes.String))
+				idAfter := string(nodesAfter[0].(nodes.Object)["Name"].(nodes.String))
+				tdb.typos = append(tdb.typos, Typo{
+					Wrong:   idBefore,
+					Correct: idAfter,
+					Commit:  commit,
+					File:    change.Change.To.Name,
+					Line:    c.After,
+				})
+			}
+		}
+	}
+	return nil, nil
+}
+
+// Finalize returns the result of the analysis. Further Consume() calls are not expected.
+func (tdb *TyposDatasetBuilder) Finalize() interface{} {
+	// deduplicate
+	typos := make([]Typo, 0, len(tdb.typos))
+	pairs := map[string]bool{}
+	for _, t := range tdb.typos {
+		id := t.Wrong + "|" + t.Correct
+		if _, exists := pairs[id]; !exists {
+			pairs[id] = true
+			typos = append(typos, t)
+		}
+	}
+	return TyposResult{Typos: typos}
+}
+
+// Fork clones this pipeline item.
+func (tdb *TyposDatasetBuilder) Fork(n int) []core.PipelineItem {
+	return core.ForkSamePipelineItem(tdb, n)
+}
+
+// Serialize converts the analysis result as returned by Finalize() to text or bytes.
+// The text format is YAML and the bytes format is Protocol Buffers.
+func (tdb *TyposDatasetBuilder) Serialize(result interface{}, binary bool, writer io.Writer) error {
+	commitsResult := result.(TyposResult)
+	if binary {
+		return tdb.serializeBinary(&commitsResult, writer)
+	}
+	tdb.serializeText(&commitsResult, writer)
+	return nil
+}
+
+func (tdb *TyposDatasetBuilder) serializeText(result *TyposResult, writer io.Writer) {
+	for _, t := range result.Typos {
+		fmt.Fprintf(writer, "  - wrong: %s\n", t.Wrong)
+		fmt.Fprintf(writer, "    correct: %s\n", t.Correct)
+		fmt.Fprintf(writer, "    commit: %s\n", t.Commit.String())
+		fmt.Fprintf(writer, "    file: %s\n", t.File)
+		fmt.Fprintf(writer, "    line: %d\n", t.Line)
+	}
+}
+
+func (tdb *TyposDatasetBuilder) serializeBinary(result *TyposResult, writer io.Writer) error {
+	message := pb.TyposDataset{}
+	message.Typos = make([]*pb.Typo, len(result.Typos))
+	for i, t := range result.Typos {
+		message.Typos[i] = &pb.Typo{
+			Wrong:   t.Wrong,
+			Correct: t.Correct,
+			Commit:  t.Commit.String(),
+			File:    t.File,
+			Line:    int32(t.Line),
+		}
+	}
+	serialized, err := proto.Marshal(&message)
+	if err != nil {
+		return err
+	}
+	_, err = writer.Write(serialized)
+	return err
+}
+
+func init() {
+	core.Registry.Register(&TyposDatasetBuilder{})
+}

+ 210 - 0
leaves/research/typos_test.go

@@ -0,0 +1,210 @@
+// +build !disable_babelfish
+
+package research
+
+import (
+	"bytes"
+	"testing"
+
+	"github.com/gogo/protobuf/proto"
+	"github.com/stretchr/testify/assert"
+	"gopkg.in/src-d/go-git.v4/plumbing"
+	"gopkg.in/src-d/go-git.v4/plumbing/object"
+	"gopkg.in/src-d/hercules.v9/internal/core"
+	"gopkg.in/src-d/hercules.v9/internal/pb"
+	items "gopkg.in/src-d/hercules.v9/internal/plumbing"
+	uast_items "gopkg.in/src-d/hercules.v9/internal/plumbing/uast"
+	"gopkg.in/src-d/hercules.v9/internal/test"
+)
+
+func TestTyposDatasetMeta(t *testing.T) {
+	tdb := TyposDatasetBuilder{}
+	assert.Equal(t, tdb.Name(), "TyposDataset")
+	assert.Len(t, tdb.Provides(), 0)
+	required := [...]string{
+		uast_items.DependencyUastChanges, items.DependencyFileDiff, items.DependencyBlobCache}
+	for _, name := range required {
+		assert.Contains(t, tdb.Requires(), name)
+	}
+	opts := tdb.ListConfigurationOptions()
+	assert.Len(t, opts, 1)
+	assert.Equal(t, opts[0].Name, ConfigTyposDatasetMaximumAllowedDistance)
+	assert.Equal(t, opts[0].Type, core.IntConfigurationOption)
+	assert.Equal(t, tdb.Flag(), "typos-dataset")
+}
+
+func TestTyposDatasetRegistration(t *testing.T) {
+	summoned := core.Registry.Summon((&TyposDatasetBuilder{}).Name())
+	assert.Len(t, summoned, 1)
+	assert.Equal(t, summoned[0].Name(), "TyposDataset")
+	leaves := core.Registry.GetLeaves()
+	matched := false
+	for _, tp := range leaves {
+		if tp.Flag() == (&TyposDatasetBuilder{}).Flag() {
+			matched = true
+			break
+		}
+	}
+	assert.True(t, matched)
+}
+
+func TestTyposDatasetConfigure(t *testing.T) {
+	tdb := TyposDatasetBuilder{}
+	facts := map[string]interface{}{}
+	facts[ConfigTyposDatasetMaximumAllowedDistance] = 5
+	assert.Nil(t, tdb.Configure(facts))
+	assert.Equal(t, tdb.MaximumAllowedDistance, 5)
+	facts = map[string]interface{}{}
+	assert.Nil(t, tdb.Configure(facts))
+	assert.Equal(t, tdb.MaximumAllowedDistance, 5)
+}
+
+func AddHash(t *testing.T, cache map[plumbing.Hash]*items.CachedBlob, hash string) {
+	objhash := plumbing.NewHash(hash)
+	blob, err := test.Repository.BlobObject(objhash)
+	assert.Nil(t, err)
+	cb := &items.CachedBlob{Blob: *blob}
+	err = cb.Cache()
+	assert.Nil(t, err)
+	cache[objhash] = cb
+}
+
+func TestTyposDatasetConsume(t *testing.T) {
+	deps := map[string]interface{}{}
+	cache := map[plumbing.Hash]*items.CachedBlob{}
+	AddHash(t, cache, "b9a12fd144274c99c7c9a0a32a0268f8b36d2f2c")
+	AddHash(t, cache, "d5f8e61069136f3578457a3131800ede353527b8")
+	AddHash(t, cache, "75bb0a09fc01db55d7322f0fae523453edba7846")
+	deps[items.DependencyBlobCache] = cache
+	changes := make(object.Changes, 2)
+	treeFrom, _ := test.Repository.TreeObject(plumbing.NewHash(
+		"eac25f9126db00e38fa72a59d49773a84580d4ce"))
+	treeTo, _ := test.Repository.TreeObject(plumbing.NewHash(
+		"828467b465864b1f757dcec9a034be49030fc8b9"))
+	changes[0] = &object.Change{From: object.ChangeEntry{
+		Name: "file_test.go",
+		Tree: treeFrom,
+		TreeEntry: object.TreeEntry{
+			Name: "file_test.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("75bb0a09fc01db55d7322f0fae523453edba7846"),
+		},
+	}, To: object.ChangeEntry{
+		Name: "file_test.go",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: "file_test.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("75bb0a09fc01db55d7322f0fae523453edba7846"),
+		},
+	}}
+	changes[1] = &object.Change{From: object.ChangeEntry{}, To: object.ChangeEntry{
+		Name: "blob_cache_test.go",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: "blob_cache_test.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("b9a12fd144274c99c7c9a0a32a0268f8b36d2f2c"),
+		},
+	},
+	}
+	deps[items.DependencyTreeChanges] = changes
+	deps[core.DependencyCommit], _ = test.Repository.CommitObject(plumbing.NewHash(
+		"84165d3b02647fae12cc026c7a580045246e8c98"))
+	deps[core.DependencyIsMerge] = false
+	uastItem := &uast_items.Extractor{}
+	assert.Nil(t, uastItem.Initialize(test.Repository))
+	uastResult, err := uastItem.Consume(deps)
+	assert.Nil(t, err)
+	deps[uast_items.DependencyUasts] = uastResult[uast_items.DependencyUasts]
+	uastChanges := &uast_items.Changes{}
+	assert.Nil(t, uastChanges.Initialize(test.Repository))
+	_, err = uastChanges.Consume(deps)
+	assert.Nil(t, err)
+	changes[0].To.TreeEntry.Hash = plumbing.NewHash("d5f8e61069136f3578457a3131800ede353527b8")
+	uastResult, err = uastItem.Consume(deps)
+	assert.Nil(t, err)
+	deps[uast_items.DependencyUasts] = uastResult[uast_items.DependencyUasts]
+	changesResult, err := uastChanges.Consume(deps)
+	assert.Nil(t, err)
+	deps[uast_items.DependencyUastChanges] = changesResult[uast_items.DependencyUastChanges]
+	fd := &items.FileDiff{}
+	assert.Nil(t, fd.Initialize(test.Repository))
+	diffResult, err := fd.Consume(deps)
+	assert.Nil(t, err)
+	deps[items.DependencyFileDiff] = diffResult[items.DependencyFileDiff]
+
+	tbd := &TyposDatasetBuilder{}
+	assert.Nil(t, tbd.Initialize(test.Repository))
+	res, err := tbd.Consume(deps)
+	assert.Nil(t, res)
+	assert.Nil(t, err)
+	assert.Len(t, tbd.typos, 4)
+	assert.Equal(t, tbd.typos[0].Wrong, "TestZeroInitializeFile")
+	assert.Equal(t, tbd.typos[0].Correct, "TestZeroInitialize")
+	assert.Equal(t, tbd.typos[0].Commit, plumbing.NewHash(
+		"84165d3b02647fae12cc026c7a580045246e8c98"))
+	assert.Equal(t, tbd.typos[0].File, "file_test.go")
+	assert.Equal(t, tbd.typos[0].Line, 74)
+
+	deps[core.DependencyIsMerge] = true
+	res, err = tbd.Consume(deps)
+	assert.Nil(t, res)
+	assert.Nil(t, err)
+	assert.Len(t, tbd.typos, 4)
+}
+
+func fixtureTyposDataset() *TyposDatasetBuilder {
+	tdb := TyposDatasetBuilder{}
+	tdb.Initialize(test.Repository)
+	tdb.typos = append(tdb.typos, Typo{
+		Wrong:   "Fo",
+		Correct: "Foo",
+		Commit:  plumbing.ZeroHash,
+		File:    "bar.go",
+		Line:    7,
+	})
+	return &tdb
+}
+
+func TestTyposDatasetFinalize(t *testing.T) {
+	tdb := fixtureTyposDataset()
+	tdb.typos = append(tdb.typos, tdb.typos[0])
+	x := tdb.Finalize().(TyposResult)
+	assert.Len(t, x.Typos, 1)
+	assert.Equal(t, x.Typos[0], Typo{
+		Wrong:   "Fo",
+		Correct: "Foo",
+		Commit:  plumbing.ZeroHash,
+		File:    "bar.go",
+		Line:    7,
+	})
+}
+
+func TestTyposDatasetSerialize(t *testing.T) {
+	ca := fixtureTyposDataset()
+	res := ca.Finalize().(TyposResult)
+	buffer := &bytes.Buffer{}
+	err := ca.Serialize(res, false, buffer)
+	assert.Nil(t, err)
+	assert.Equal(t, `  - wrong: Fo
+    correct: Foo
+    commit: 0000000000000000000000000000000000000000
+    file: bar.go
+    line: 7
+`, buffer.String())
+
+	buffer = &bytes.Buffer{}
+	err = ca.Serialize(res, true, buffer)
+	assert.Nil(t, err)
+	msg := pb.TyposDataset{}
+	assert.Nil(t, proto.Unmarshal(buffer.Bytes(), &msg))
+	assert.Len(t, msg.Typos, 1)
+	assert.Equal(t, *msg.Typos[0], pb.Typo{
+		Wrong:   "Fo",
+		Correct: "Foo",
+		Commit:  "0000000000000000000000000000000000000000",
+		File:    "bar.go",
+		Line:    7,
+	})
+}