Browse Source

Add UAST parser

Vadim Markovtsev 7 years ago
parent
commit
1a8036dc44
5 changed files with 224 additions and 9 deletions
  1. 3 0
      .gitignore
  2. 13 0
      .travis.yml
  3. 11 9
      pipeline_test.go
  4. 94 0
      uast.go
  5. 103 0
      uast_test.go

+ 3 - 0
.gitignore

@@ -1,3 +1,6 @@
+**/.DS_Store
+.idea
+
 # Compiled Object files, Static and Dynamic libs (Shared Objects)
 *.o
 *.a

+ 13 - 0
.travis.yml

@@ -2,6 +2,15 @@ dist: trusty
 
 language: go
 
+services:
+  - docker
+
+addons:
+  apt:
+    packages:
+    - libxml2-dev
+    - libcunit1-dev
+
 go:
   - 1.8
   - 1.9
@@ -13,6 +22,10 @@ before_install:
   - export PATH=$PATH:~/.local/bin
   - pip3 install --user -r requirements.txt
   - pip3 install --user tensorflow
+  - docker run -d --privileged -p 9432:9432 --name bblfsh bblfsh/server
+  - git clone https://github.com/bblfsh/libuast
+  - cd libuast && cmake -DCMAKE_BUILD_TYPE=Release . && make && ln -s src libuast && cd ..
+  - export CGO_CFLAGS="-I$(pwd)/libuast" && export CGO_LDFLAGS="-L$(pwd)/libuast/lib -Wl,-rpath -Wl,$(pwd)/libuast/lib"
   
 script:
   - go test -v -cpu=1,2 -coverprofile=coverage.txt -covermode=count gopkg.in/src-d/hercules.v2

+ 11 - 9
pipeline_test.go

@@ -223,15 +223,17 @@ func init() {
 	if err == nil {
 		testRepository, err = git.PlainOpen(cwd)
 		if err == nil {
-			iter, _ := testRepository.CommitObjects()
-			commits := -1
-			for ; err != io.EOF; _, err = iter.Next() {
-				if err != nil {
-					panic(err)
-				}
-				commits++
-				if commits >= 100 {
-					return
+			iter, err := testRepository.CommitObjects()
+			if err == nil {
+				commits := -1
+				for ; err != io.EOF; _, err = iter.Next() {
+					if err != nil {
+						panic(err)
+					}
+					commits++
+					if commits >= 100 {
+						return
+					}
 				}
 			}
 		}

+ 94 - 0
uast.go

@@ -0,0 +1,94 @@
+package hercules
+
+import (
+	"context"
+
+	"gopkg.in/src-d/go-git.v4"
+	"gopkg.in/src-d/go-git.v4/plumbing"
+	"gopkg.in/src-d/go-git.v4/plumbing/object"
+	"gopkg.in/src-d/go-git.v4/utils/merkletrie"
+	"gopkg.in/bblfsh/client-go.v0"
+	"github.com/bblfsh/sdk/uast"
+	"github.com/bblfsh/sdk/protocol"
+	"errors"
+	"strings"
+)
+
+type UASTExtractor struct {
+    Endpoint string
+	Context func() context.Context
+	client *bblfsh.BblfshClient
+}
+
+func (exr *UASTExtractor) Name() string {
+	return "UAST"
+}
+
+func (exr *UASTExtractor) Provides() []string {
+	arr := [...]string{"uasts"}
+	return arr[:]
+}
+
+func (exr *UASTExtractor) Requires() []string {
+	arr := [...]string{"changes", "blob_cache"}
+	return arr[:]
+}
+
+func (exr *UASTExtractor) Initialize(repository *git.Repository) {
+	client, err := bblfsh.NewBblfshClient(exr.Endpoint)
+	if err != nil {
+		panic(err)
+	}
+	exr.client = client
+	if exr.Context == nil {
+		exr.Context = func() context.Context { return context.Background() }
+	}
+}
+
+func (exr *UASTExtractor) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
+	cache := deps["blob_cache"].(map[plumbing.Hash]*object.Blob)
+	treeDiffs := deps["changes"].(object.Changes)
+	uasts := map[string]*uast.Node{}
+	for _, change := range treeDiffs {
+		action, err := change.Action()
+		if err != nil {
+			return nil, err
+		}
+		switch action {
+		case merkletrie.Insert:
+			uasts[change.To.Name], err = exr.extractUAST(&object.File{
+				Name: change.To.Name, Blob: *cache[change.To.TreeEntry.Hash]})
+		case merkletrie.Delete:
+			continue
+		case merkletrie.Modify:
+			uasts[change.To.Name], err = exr.extractUAST(&object.File{
+				Name: change.To.Name, Blob: *cache[change.To.TreeEntry.Hash]})
+		}
+		if err != nil {
+			return nil, err
+		}
+	}
+	return map[string]interface{}{"uasts": uasts}, nil
+}
+
+func (exr *UASTExtractor) Finalize() interface{} {
+	return nil
+}
+
+func (exr *UASTExtractor) extractUAST(file *object.File) (*uast.Node, error) {
+	request := exr.client.NewParseRequest()
+	contents, err := file.Contents()
+	if err != nil {
+		return nil, err
+	}
+	request.Content(contents)
+	request.Filename(file.Name)
+	response, err := request.DoWithContext(exr.Context())
+    if response.Status != protocol.Ok {
+		return nil, errors.New(strings.Join(response.Errors, "\n"))
+	}
+	if err != nil {
+		return nil, err
+	}
+	return response.UAST, nil
+}

+ 103 - 0
uast_test.go

@@ -0,0 +1,103 @@
+package hercules
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"gopkg.in/src-d/go-git.v4/plumbing/object"
+	"gopkg.in/src-d/go-git.v4/plumbing"
+	"github.com/bblfsh/sdk/uast"
+)
+
+func fixtureUASTExtractor() *UASTExtractor {
+	exr := UASTExtractor{Endpoint: "0.0.0.0:9432"}
+	exr.Initialize(testRepository)
+	return &exr
+}
+
+func TestUASTExtractorMeta(t *testing.T) {
+	exr := fixtureUASTExtractor()
+	assert.Equal(t, exr.Name(), "UAST")
+	assert.Equal(t, len(exr.Provides()), 1)
+	assert.Equal(t, exr.Provides()[0], "uasts")
+	assert.Equal(t, len(exr.Requires()), 2)
+	assert.Equal(t, exr.Requires()[0], "changes")
+	assert.Equal(t, exr.Requires()[1], "blob_cache")
+}
+
+func TestUASTExtractorFinalize(t *testing.T) {
+	exr := fixtureUASTExtractor()
+	r := exr.Finalize()
+	assert.Nil(t, r)
+}
+
+func TestUASTExtractorConsume(t *testing.T) {
+	exr := fixtureUASTExtractor()
+	changes := make(object.Changes, 2)
+	// 2b1ed978194a94edeabbca6de7ff3b5771d4d665
+	treeFrom, _ := testRepository.TreeObject(plumbing.NewHash(
+		"96c6ece9b2f3c7c51b83516400d278dea5605100"))
+	treeTo, _ := testRepository.TreeObject(plumbing.NewHash(
+		"251f2094d7b523d5bcc60e663b6cf38151bf8844"))
+	changes[0] = &object.Change{From: object.ChangeEntry{
+		Name: "analyser.go",
+		Tree: treeFrom,
+		TreeEntry: object.TreeEntry{
+			Name: "analyser.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("baa64828831d174f40140e4b3cfa77d1e917a2c1"),
+		},
+	}, To: object.ChangeEntry{},
+	}
+	changes[1] = &object.Change{From: object.ChangeEntry{
+		Name: "cmd/hercules/main.go",
+		Tree: treeFrom,
+		TreeEntry: object.TreeEntry{
+			Name: "cmd/hercules/main.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("c29112dbd697ad9b401333b80c18a63951bc18d9"),
+		},
+	}, To: object.ChangeEntry{
+		Name: "cmd/hercules/main.go",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: "cmd/hercules/main.go",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("f7d918ec500e2f925ecde79b51cc007bac27de72"),
+		},
+	},
+	}
+	cache := map[plumbing.Hash]*object.Blob{}
+	hash := plumbing.NewHash("baa64828831d174f40140e4b3cfa77d1e917a2c1")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	hash = plumbing.NewHash("5d78f57d732aed825764347ec6f3ab74d50d0619")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	hash = plumbing.NewHash("c29112dbd697ad9b401333b80c18a63951bc18d9")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	hash = plumbing.NewHash("f7d918ec500e2f925ecde79b51cc007bac27de72")
+	cache[hash], _ = testRepository.BlobObject(hash)
+	deps := map[string]interface{}{}
+	deps["blob_cache"] = cache
+	deps["changes"] = changes
+	res, err := exr.Consume(deps)
+	// No Go driver
+	assert.Nil(t, res)
+	assert.NotNil(t, err)
+
+	changes[1] = &object.Change{From: object.ChangeEntry{}, To: object.ChangeEntry{
+		Name: "labours.py",
+		Tree: treeTo,
+		TreeEntry: object.TreeEntry{
+			Name: "labours.py",
+			Mode: 0100644,
+			Hash: plumbing.NewHash("5d78f57d732aed825764347ec6f3ab74d50d0619"),
+		},
+	},
+	}
+
+	res, err = exr.Consume(deps)
+	assert.Nil(t, err)
+	uasts := res["uasts"].(map[string]*uast.Node)
+	assert.Equal(t, len(uasts), 1)
+	assert.Equal(t, len(uasts["labours.py"].Children), 24)
+}