Bläddra i källkod

Merge pull request #148 from vmarkovtsev/master

LZ4 hibernation
Vadim Markovtsev 6 år sedan
förälder
incheckning
2f32ae1562

+ 4 - 4
.travis.yml

@@ -7,8 +7,8 @@ git:
 language: go
 go_import_path: gopkg.in/src-d/hercules.v6
 go:
-- 1.10.x
-- 1.11.x
+  - 1.10.x
+  - 1.11.x
 
 services:
   - docker
@@ -65,7 +65,7 @@ install:
   - make TAGS=tensorflow
 script:
   - set -e
-  - if [ $TRAVIS_GO_VERSION = "1.11.*" ]; then test -z "$(gofmt -s -l . | grep -v vendor/)"; fi
+  - if [ $TRAVIS_GO_VERSION == "1.11.x" ]; then test -z "$(gofmt -s -l . | grep -v vendor/)"; fi
   - go vet -tags tensorflow ./...
   - golint -set_exit_status $(go list ./... | grep -v /vendor/)
   - flake8
@@ -80,7 +80,7 @@ script:
   - $GOPATH/bin/hercules --burndown --burndown-files --burndown-people --couples --devs --quiet --pb https://github.com/src-d/hercules | python3 labours.py -f pb -m all -o out --backend Agg --disable-projector
   - # $GOPATH/bin/hercules --sentiment --quiet --languages Python https://github.com/src-d/hercules > /dev/null
   - set +e
-  - if [ $TRAVIS_GO_VERSION = "1.11.*" ]; then bash <(curl -s https://codecov.io/bash); fi
+  - if [ $TRAVIS_GO_VERSION == "1.11.x" ]; then bash <(curl -s https://codecov.io/bash); fi
 
 jobs:
   include:

+ 2 - 2
internal/burndown/file.go

@@ -257,11 +257,11 @@ func (file *File) Update(time int, pos int, insLength int, delLength int) {
 	if delta != 0 {
 		for iter = iter.Next(); !iter.Limit(); iter = iter.Next() {
 			// we do not need to re-balance the tree
-			iter.Item().Key = uint32(int(iter.Item().Key)+delta)
+			iter.Item().Key = uint32(int(iter.Item().Key) + delta)
 		}
 		// have to adjust origin in case insLength == 0
 		if origin.Key > uint32(pos) {
-			origin.Key = uint32(int(origin.Key)+delta)
+			origin.Key = uint32(int(origin.Key) + delta)
 		}
 	}
 

+ 6 - 6
internal/burndown/file_test.go

@@ -56,7 +56,7 @@ func TestBullshitFile(t *testing.T) {
 	file.Update(1, 10, 0, 0)
 	assert.Equal(t, int64(100), status[0])
 	assert.Equal(t, int64(0), status[1])
-	assert.Equal(t, alloc.Size(), 3)  // 1 + 2 nodes
+	assert.Equal(t, alloc.Size(), 3) // 1 + 2 nodes
 }
 
 func TestCloneFileShallow(t *testing.T) {
@@ -212,8 +212,8 @@ func TestFusedFile(t *testing.T) {
 	dump = file.Dump()
 	assert.Equal(t, "0 0\n93 -1\n", dump)
 	assert.Equal(t, alloc.Size(), 5)
-	file.Update(3, 10, 6, 0) // +2 nodes
-	assert.Equal(t, alloc.Size(), 5)  // using gaps
+	file.Update(3, 10, 6, 0)         // +2 nodes
+	assert.Equal(t, alloc.Size(), 5) // using gaps
 	file.Update(4, 10, 6, 0)
 	assert.Equal(t, alloc.Size(), 6)
 }
@@ -502,7 +502,7 @@ func TestBug4File(t *testing.T) {
 	// 0 125 | 2 215 | 27 214 | 28 300 | 29 214 | 30 215 | 37 214 | 40 215 | 44 125 | 46 215 | 48 125 | 49 215 | 69 125 | 73 215 | 79 125 | 80 0 | 81 -1
 	dump = file.Dump()
 	assert.Equal(t, "0 125\n2 215\n27 214\n28 300\n29 214\n30 215\n37 214\n40 215\n44 125\n46 215\n48 125\n49 215\n69 125\n73 215\n79 125\n80 0\n81 -1\n", dump)
-	assert.Equal(t, 1 + file.tree.Len(), alloc.Used())
+	assert.Equal(t, 1+file.tree.Len(), alloc.Used())
 	assert.Equal(t, file.Nodes(), file.tree.Len())
 }
 
@@ -539,10 +539,10 @@ func TestUpdatePanic(t *testing.T) {
 	file := NewFileFromTree(keys[:], vals[:], rbtree.NewAllocator())
 	file.tree.DeleteWithKey(0)
 	file.tree.Insert(rbtree.Item{Key: 1, Value: math.MaxUint32})
-	assert.PanicsWithValue(t, "invalid tree state", func(){file.Update(1, 0, 1, 0)})
+	assert.PanicsWithValue(t, "invalid tree state", func() { file.Update(1, 0, 1, 0) })
 	file.tree.Insert(rbtree.Item{Key: 0, Value: math.MaxUint32})
 	assert.PanicsWithValue(
-		t, "time may not be >= MaxUint32", func(){file.Update(math.MaxUint32, 0, 1, 0)})
+		t, "time may not be >= MaxUint32", func() { file.Update(math.MaxUint32, 0, 1, 0) })
 }
 
 func TestFileValidate(t *testing.T) {

+ 1 - 1
internal/core/forks.go

@@ -84,7 +84,7 @@ const (
 )
 
 // planPrintFunc is used to print the execution plan in prepareRunPlan().
-var planPrintFunc = func(args... interface{}) {
+var planPrintFunc = func(args ...interface{}) {
 	fmt.Fprintln(os.Stderr, args...)
 }
 

+ 1 - 1
internal/core/pipeline_test.go

@@ -502,7 +502,7 @@ func TestPipelineDumpPlanConfigure(t *testing.T) {
 	pipeline.Initialize(map[string]interface{}{ConfigPipelineDumpPlan: true})
 	assert.True(t, pipeline.DumpPlan)
 	stream := &bytes.Buffer{}
-	planPrintFunc = func(args... interface{}) {
+	planPrintFunc = func(args ...interface{}) {
 		fmt.Fprintln(stream, args...)
 	}
 	commits := make([]*object.Commit, 1)

+ 3 - 3
internal/plumbing/renames.go

@@ -295,7 +295,7 @@ func (ra *RenameAnalysis) Consume(deps map[string]interface{}) (map[string]inter
 						matchesB,
 						&object.Change{
 							From: deletedBlobsB[d].change.From,
-							To: addedBlobsB[a].change.To})
+							To:   addedBlobsB[a].change.To})
 					break
 				}
 			}
@@ -355,7 +355,7 @@ func (ra *RenameAnalysis) Fork(n int) []core.PipelineItem {
 
 func (ra *RenameAnalysis) sizesAreClose(size1 int64, size2 int64) bool {
 	size := internal.Max64(1, internal.Max64(size1, size2))
-	return (internal.Abs64(size1-size2)*10000)/size <= int64(100-ra.SimilarityThreshold) * 100
+	return (internal.Abs64(size1-size2)*10000)/size <= int64(100-ra.SimilarityThreshold)*100
 }
 
 func (ra *RenameAnalysis) blobsAreClose(
@@ -487,7 +487,7 @@ func sortRenameCandidates(candidates []int, origin string, nameGetter func(int)
 		return distances[i].Distance < distances[j].Distance
 	})
 	for i, cd := range distances {
-		candidates[i]  = cd.Candidate
+		candidates[i] = cd.Candidate
 	}
 }
 

+ 1 - 1
internal/plumbing/renames_test.go

@@ -185,4 +185,4 @@ func TestRenameAnalysisSortRenameCandidates(t *testing.T) {
 	})
 	assert.Equal(t, candidates[0], 3)
 	assert.Equal(t, candidates[1], 1)
-}
+}

+ 32 - 0
internal/rbtree/lz4.go

@@ -0,0 +1,32 @@
+package rbtree
+
+/*
+#cgo CFLAGS: -std=c99
+#include "lz4hc.c"
+*/
+import "C"
+import "unsafe"
+
+// CompressUInt32Slice compresses a slice of uint32-s with LZ4.
+func CompressUInt32Slice(data []uint32) []byte {
+	dstSize := C.LZ4_compressBound(C.int(len(data) * 4))
+	dst := make([]byte, dstSize)
+	dstSize = C.LZ4_compress_HC(
+		unsafe.Pointer(&data[0]),
+		unsafe.Pointer(&dst[0]),
+		C.int(len(data)*4),
+		dstSize,
+		12)
+	finalDst := make([]byte, dstSize)
+	copy(finalDst, dst[:dstSize])
+	return finalDst
+}
+
+// DecompressUInt32Slice decompresses a slice of uint32-s previously compressed with LZ4.
+// `result` must be preallocated.
+func DecompressUInt32Slice(data []byte, result []uint32) {
+	C.LZ4_decompress_fast(
+		unsafe.Pointer(&data[0]),
+		unsafe.Pointer(&result[0]),
+		C.int(len(result)*4))
+}

+ 23 - 0
internal/rbtree/lz4_test.go

@@ -0,0 +1,23 @@
+package rbtree
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestCompressDecompressUInt32Slice(t *testing.T) {
+	data := make([]uint32, 1000)
+	for i := range data {
+		data[i] = 7
+	}
+	packed := CompressUInt32Slice(data)
+	assert.Len(t, packed, 29)
+	for i := range data {
+		data[i] = 0
+	}
+	DecompressUInt32Slice(packed, data)
+	for i := range data {
+		assert.Equal(t, uint32(7), data[i], i)
+	}
+}

Filskillnaden har hållts tillbaka eftersom den är för stor
+ 1828 - 0
internal/rbtree/lz4hc.c


+ 79 - 9
internal/rbtree/rbtree.go

@@ -2,6 +2,7 @@ package rbtree
 
 import (
 	"math"
+	"sync"
 )
 
 //
@@ -16,15 +17,19 @@ type Item struct {
 
 // Allocator is the allocator for nodes in a RBTree.
 type Allocator struct {
-	storage []node
-	gaps map[uint32]bool
+	HibernationThreshold int
+
+	storage           []node
+	gaps              map[uint32]bool
+	hibernatedStorage [6][]byte
+	hibernatedLen     int
 }
 
 // NewAllocator creates a new allocator for RBTree's nodes.
 func NewAllocator() *Allocator {
-	return &Allocator{ 
+	return &Allocator{
 		storage: []node{},
-		gaps: map[uint32]bool{},
+		gaps:    map[uint32]bool{},
 	}
 }
 
@@ -42,7 +47,7 @@ func (allocator Allocator) Used() int {
 func (allocator *Allocator) Clone() *Allocator {
 	newAllocator := &Allocator{
 		storage: make([]node, len(allocator.storage), cap(allocator.storage)),
-		gaps: map[uint32]bool{},
+		gaps:    map[uint32]bool{},
 	}
 	copy(newAllocator.storage, allocator.storage)
 	for key, val := range allocator.gaps {
@@ -51,6 +56,71 @@ func (allocator *Allocator) Clone() *Allocator {
 	return newAllocator
 }
 
+// Hibernate compresses the allocated memory.
+func (allocator *Allocator) Hibernate() {
+	if allocator.HibernationThreshold == 0 || len(allocator.storage) < allocator.HibernationThreshold {
+		return
+	}
+	allocator.hibernatedLen = len(allocator.storage)
+	buffers := [6][]uint32{}
+	for i := 0; i < len(buffers); i++ {
+		buffers[i] = make([]uint32, len(allocator.storage))
+	}
+	// we deinterleave to achieve a better compression ratio
+	for i, n := range allocator.storage {
+		buffers[0][i] = n.item.Key
+		buffers[1][i] = n.item.Value
+		buffers[2][i] = n.left
+		buffers[3][i] = n.parent
+		buffers[4][i] = n.right
+		if n.color {
+			buffers[5][i] = 1
+		}
+	}
+	allocator.storage = nil
+	wg := &sync.WaitGroup{}
+	wg.Add(len(buffers))
+	for i, buffer := range buffers {
+		go func(i int, buffer []uint32) {
+			allocator.hibernatedStorage[i] = CompressUInt32Slice(buffer)
+			buffers[i] = nil
+			wg.Done()
+		}(i, buffer)
+	}
+	wg.Wait()
+}
+
+// Boot performs the opposite of Hibernate() - decompresses and restores the allocated memory.
+func (allocator *Allocator) Boot() {
+	if allocator.hibernatedLen == 0 {
+		// not hibernated
+		return
+	}
+	buffers := [6][]uint32{}
+	wg := &sync.WaitGroup{}
+	wg.Add(len(buffers))
+	for i := 0; i < len(buffers); i++ {
+		go func(i int) {
+			buffers[i] = make([]uint32, allocator.hibernatedLen)
+			DecompressUInt32Slice(allocator.hibernatedStorage[i], buffers[i])
+			allocator.hibernatedStorage[i] = nil
+			wg.Done()
+		}(i)
+	}
+	wg.Wait()
+	allocator.storage = make([]node, allocator.hibernatedLen, (allocator.hibernatedLen*3)/2)
+	for i := range allocator.storage {
+		n := &allocator.storage[i]
+		n.item.Key = buffers[0][i]
+		n.item.Value = buffers[1][i]
+		n.left = buffers[2][i]
+		n.parent = buffers[3][i]
+		n.right = buffers[4][i]
+		n.color = buffers[5][i] > 0
+	}
+	allocator.hibernatedLen = 0
+}
+
 func (allocator *Allocator) malloc() uint32 {
 	if len(allocator.gaps) > 0 {
 		var key uint32
@@ -66,7 +136,7 @@ func (allocator *Allocator) malloc() uint32 {
 		allocator.storage = append(allocator.storage, node{})
 		n = 1
 	}
-	if n == negativeLimitNode - 1 {
+	if n == negativeLimitNode-1 {
 		// math.MaxUint32 is reserved
 		panic("the size of my RBTree allocator has reached the maximum value for uint32, sorry")
 	}
@@ -133,7 +203,7 @@ func (tree RBTree) CloneShallow(allocator *Allocator) *RBTree {
 // CloneDeep performs a deep copy of the tree - the nodes are created from scratch.
 func (tree RBTree) CloneDeep(allocator *Allocator) *RBTree {
 	clone := &RBTree{
-		count: tree.count,
+		count:     tree.count,
 		allocator: allocator,
 	}
 	nodeMap := map[uint32]uint32{0: 0}
@@ -402,8 +472,8 @@ func doAssert(b bool) {
 }
 
 const (
-	red = false
-	black = true
+	red               = false
+	black             = true
 	negativeLimitNode = math.MaxUint32
 )
 

+ 7 - 4
internal/rbtree/rbtree_test.go

@@ -9,7 +9,6 @@ import (
 	"github.com/stretchr/testify/assert"
 )
 
-
 // Create a tree storing a set of integers
 func testNewIntSet() *RBTree {
 	return NewRBTree(NewAllocator())
@@ -80,7 +79,9 @@ func TestDelete(t *testing.T) {
 func iterToString(i Iterator) string {
 	s := ""
 	for ; !i.Limit(); i = i.Next() {
-		if s != "" { s = s + ","}
+		if s != "" {
+			s = s + ","
+		}
 		s = s + fmt.Sprintf("%d", i.Item().Key)
 	}
 	return s
@@ -89,7 +90,9 @@ func iterToString(i Iterator) string {
 func reverseIterToString(i Iterator) string {
 	s := ""
 	for ; !i.NegativeLimit(); i = i.Prev() {
-		if s != "" { s = s + ","}
+		if s != "" {
+			s = s + ","
+		}
 		s = s + fmt.Sprintf("%d", i.Item().Key)
 	}
 	return s
@@ -214,7 +217,7 @@ func (oiter oracleIterator) NegativeLimit() bool {
 }
 
 func (oiter oracleIterator) Max() bool {
-	return oiter.index == len(oiter.o.data) - 1
+	return oiter.index == len(oiter.o.data)-1
 }
 
 func (oiter oracleIterator) Item() int {

+ 4 - 4
leaves/burndown_test.go

@@ -1173,8 +1173,8 @@ func TestBurndownDeserialize(t *testing.T) {
 
 func TestBurndownEmptyFileHistory(t *testing.T) {
 	burndown := &BurndownAnalysis{
-		Sampling: 30,
-		Granularity: 30,
+		Sampling:      30,
+		Granularity:   30,
 		globalHistory: sparseHistory{0: map[int]int64{0: 10}},
 		fileHistories: map[string]sparseHistory{"test.go": {}},
 	}
@@ -1188,8 +1188,8 @@ func TestBurndownEmptyFileHistory(t *testing.T) {
 
 func TestBurndownNegativePeople(t *testing.T) {
 	burndown := &BurndownAnalysis{
-		Sampling:    30,
-		Granularity: 30,
+		Sampling:     30,
+		Granularity:  30,
 		PeopleNumber: -1,
 	}
 	err := burndown.Initialize(test.Repository)

+ 5 - 5
leaves/devs.go

@@ -49,7 +49,7 @@ type DevDay struct {
 	// Commits is the number of commits made by a particular developer in a particular day.
 	Commits int
 	// Added is the number of added lines by a particular developer in a particular day.
-	Added   int
+	Added int
 	// Removed is the number of removed lines by a particular developer in a particular day.
 	Removed int
 	// Changed is the number of changed lines by a particular developer in a particular day.
@@ -208,7 +208,7 @@ func (devs *DevsAnalysis) Consume(deps map[string]interface{}) (map[string]inter
 // Finalize returns the result of the analysis. Further Consume() calls are not expected.
 func (devs *DevsAnalysis) Finalize() interface{} {
 	return DevsResult{
-		Days: devs.days,
+		Days:               devs.days,
 		reversedPeopleDict: devs.reversedPeopleDict,
 	}
 }
@@ -253,7 +253,7 @@ func (devs *DevsAnalysis) Deserialize(pbmessage []byte) (interface{}, error) {
 		}
 	}
 	result := DevsResult{
-		Days: days,
+		Days:               days,
 		reversedPeopleDict: message.DevIndex,
 	}
 	return result, nil
@@ -270,10 +270,10 @@ func (devs *DevsAnalysis) MergeResults(r1, r2 interface{}, c1, c2 *core.CommonAn
 	}
 	devIndex := map[string]devIndexPair{}
 	for dev, devName := range cr1.reversedPeopleDict {
-		devIndex[devName] = devIndexPair{Index1: dev+1, Index2: devIndex[devName].Index2}
+		devIndex[devName] = devIndexPair{Index1: dev + 1, Index2: devIndex[devName].Index2}
 	}
 	for dev, devName := range cr2.reversedPeopleDict {
-		devIndex[devName] = devIndexPair{Index1: devIndex[devName].Index1, Index2: dev+1}
+		devIndex[devName] = devIndexPair{Index1: devIndex[devName].Index1, Index2: dev + 1}
 	}
 	jointDevSeq := make([]string, len(devIndex))
 	{

+ 7 - 7
leaves/devs_test.go

@@ -284,7 +284,7 @@ func TestDevsMergeResults(t *testing.T) {
 	people1 := [...]string{"1@srcd", "2@srcd"}
 	people2 := [...]string{"3@srcd", "1@srcd"}
 	r1 := DevsResult{
-		Days: map[int]map[int]*DevDay{},
+		Days:               map[int]map[int]*DevDay{},
 		reversedPeopleDict: people1[:],
 	}
 	r1.Days[1] = map[int]*DevDay{}
@@ -296,7 +296,7 @@ func TestDevsMergeResults(t *testing.T) {
 	r1.Days[11] = map[int]*DevDay{}
 	r1.Days[11][1] = &DevDay{10, 20, 30, 40}
 	r2 := DevsResult{
-		Days: map[int]map[int]*DevDay{},
+		Days:               map[int]map[int]*DevDay{},
 		reversedPeopleDict: people2[:],
 	}
 	r2.Days[1] = map[int]*DevDay{}
@@ -317,7 +317,7 @@ func TestDevsMergeResults(t *testing.T) {
 	assert.Equal(t, rm.Days[11], map[int]*DevDay{1: {10, 20, 30, 40}})
 	assert.Equal(t, rm.Days[2], map[int]*DevDay{
 		identity.AuthorMissing: {100, 200, 300, 400},
-		2: {11, 21, 31, 41},
+		2:                      {11, 21, 31, 41},
 	})
 	assert.Equal(t, rm.Days[1], map[int]*DevDay{
 		0: {11, 22, 33, 44},
@@ -325,8 +325,8 @@ func TestDevsMergeResults(t *testing.T) {
 		2: {10, 20, 30, 40},
 	})
 	assert.Equal(t, rm.Days[10], map[int]*DevDay{
-		0: {11, 21, 31, 41},
-		2: {11, 21, 31, 41},
-		identity.AuthorMissing: {100*2, 200*2, 300*2, 400*2},
+		0:                      {11, 21, 31, 41},
+		2:                      {11, 21, 31, 41},
+		identity.AuthorMissing: {100 * 2, 200 * 2, 300 * 2, 400 * 2},
 	})
-}
+}