|
@@ -0,0 +1,302 @@
|
|
|
+package plumbing
|
|
|
+
|
|
|
+// Adapted from https://github.com/kr/binarydist
|
|
|
+// Original license:
|
|
|
+//
|
|
|
+// Copyright 2012 Keith Rarick
|
|
|
+//
|
|
|
+// Permission is hereby granted, free of charge, to any person
|
|
|
+// obtaining a copy of this software and associated documentation
|
|
|
+// files (the "Software"), to deal in the Software without
|
|
|
+// restriction, including without limitation the rights to use,
|
|
|
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
+// copies of the Software, and to permit persons to whom the
|
|
|
+// Software is furnished to do so, subject to the following
|
|
|
+// conditions:
|
|
|
+//
|
|
|
+// The above copyright notice and this permission notice shall be
|
|
|
+// included in all copies or substantial portions of the Software.
|
|
|
+//
|
|
|
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
|
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
|
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
|
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
|
+// OTHER DEALINGS IN THE SOFTWARE.
|
|
|
+
|
|
|
+import (
|
|
|
+ "bytes"
|
|
|
+)
|
|
|
+
|
|
|
+func swap(a []int, i, j int) { a[i], a[j] = a[j], a[i] }
|
|
|
+
|
|
|
+func split(I, V []int, start, length, h int) {
|
|
|
+ var i, j, k, x, jj, kk int
|
|
|
+
|
|
|
+ if length < 16 {
|
|
|
+ for k = start; k < start+length; k += j {
|
|
|
+ j = 1
|
|
|
+ x = V[I[k]+h]
|
|
|
+ for i = 1; k+i < start+length; i++ {
|
|
|
+ if V[I[k+i]+h] < x {
|
|
|
+ x = V[I[k+i]+h]
|
|
|
+ j = 0
|
|
|
+ }
|
|
|
+ if V[I[k+i]+h] == x {
|
|
|
+ swap(I, k+i, k+j)
|
|
|
+ j++
|
|
|
+ }
|
|
|
+ }
|
|
|
+ for i = 0; i < j; i++ {
|
|
|
+ V[I[k+i]] = k + j - 1
|
|
|
+ }
|
|
|
+ if j == 1 {
|
|
|
+ I[k] = -1
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ x = V[I[start+length/2]+h]
|
|
|
+ jj = 0
|
|
|
+ kk = 0
|
|
|
+ for i = start; i < start+length; i++ {
|
|
|
+ if V[I[i]+h] < x {
|
|
|
+ jj++
|
|
|
+ }
|
|
|
+ if V[I[i]+h] == x {
|
|
|
+ kk++
|
|
|
+ }
|
|
|
+ }
|
|
|
+ jj += start
|
|
|
+ kk += jj
|
|
|
+
|
|
|
+ i = start
|
|
|
+ j = 0
|
|
|
+ k = 0
|
|
|
+ for i < jj {
|
|
|
+ if V[I[i]+h] < x {
|
|
|
+ i++
|
|
|
+ } else if V[I[i]+h] == x {
|
|
|
+ swap(I, i, jj+j)
|
|
|
+ j++
|
|
|
+ } else {
|
|
|
+ swap(I, i, kk+k)
|
|
|
+ k++
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ for jj+j < kk {
|
|
|
+ if V[I[jj+j]+h] == x {
|
|
|
+ j++
|
|
|
+ } else {
|
|
|
+ swap(I, jj+j, kk+k)
|
|
|
+ k++
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if jj > start {
|
|
|
+ split(I, V, start, jj-start, h)
|
|
|
+ }
|
|
|
+
|
|
|
+ for i = 0; i < kk-jj; i++ {
|
|
|
+ V[I[jj+i]] = kk - 1
|
|
|
+ }
|
|
|
+ if jj == kk-1 {
|
|
|
+ I[jj] = -1
|
|
|
+ }
|
|
|
+
|
|
|
+ if start+length > kk {
|
|
|
+ split(I, V, kk, start+length-kk, h)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func qsufsort(obuf []byte) []int {
|
|
|
+ var buckets [256]int
|
|
|
+ var i, h int
|
|
|
+ I := make([]int, len(obuf)+1)
|
|
|
+ V := make([]int, len(obuf)+1)
|
|
|
+
|
|
|
+ for _, c := range obuf {
|
|
|
+ buckets[c]++
|
|
|
+ }
|
|
|
+ for i = 1; i < 256; i++ {
|
|
|
+ buckets[i] += buckets[i-1]
|
|
|
+ }
|
|
|
+ copy(buckets[1:], buckets[:])
|
|
|
+ buckets[0] = 0
|
|
|
+
|
|
|
+ for i, c := range obuf {
|
|
|
+ buckets[c]++
|
|
|
+ I[buckets[c]] = i
|
|
|
+ }
|
|
|
+
|
|
|
+ I[0] = len(obuf)
|
|
|
+ for i, c := range obuf {
|
|
|
+ V[i] = buckets[c]
|
|
|
+ }
|
|
|
+
|
|
|
+ V[len(obuf)] = 0
|
|
|
+ for i = 1; i < 256; i++ {
|
|
|
+ if buckets[i] == buckets[i-1]+1 {
|
|
|
+ I[buckets[i]] = -1
|
|
|
+ }
|
|
|
+ }
|
|
|
+ I[0] = -1
|
|
|
+
|
|
|
+ for h = 1; I[0] != -(len(obuf) + 1); h += h {
|
|
|
+ var n int
|
|
|
+ for i = 0; i < len(obuf)+1; {
|
|
|
+ if I[i] < 0 {
|
|
|
+ n -= I[i]
|
|
|
+ i -= I[i]
|
|
|
+ } else {
|
|
|
+ if n != 0 {
|
|
|
+ I[i-n] = -n
|
|
|
+ }
|
|
|
+ n = V[I[i]] + 1 - i
|
|
|
+ split(I, V, i, n, h)
|
|
|
+ i += n
|
|
|
+ n = 0
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if n != 0 {
|
|
|
+ I[i-n] = -n
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ for i = 0; i < len(obuf)+1; i++ {
|
|
|
+ I[V[i]] = i
|
|
|
+ }
|
|
|
+ return I
|
|
|
+}
|
|
|
+
|
|
|
+func matchlen(a, b []byte) (i int) {
|
|
|
+ for i < len(a) && i < len(b) && a[i] == b[i] {
|
|
|
+ i++
|
|
|
+ }
|
|
|
+ return i
|
|
|
+}
|
|
|
+
|
|
|
+func search(I []int, obuf, nbuf []byte, st, en int) (pos, n int) {
|
|
|
+ if en-st < 2 {
|
|
|
+ x := matchlen(obuf[I[st]:], nbuf)
|
|
|
+ y := matchlen(obuf[I[en]:], nbuf)
|
|
|
+
|
|
|
+ if x > y {
|
|
|
+ return I[st], x
|
|
|
+ }
|
|
|
+ return I[en], y
|
|
|
+ }
|
|
|
+
|
|
|
+ x := st + (en-st)/2
|
|
|
+ if bytes.Compare(obuf[I[x]:], nbuf) < 0 {
|
|
|
+ return search(I, obuf, nbuf, x, en)
|
|
|
+ }
|
|
|
+ return search(I, obuf, nbuf, st, x)
|
|
|
+}
|
|
|
+
|
|
|
+// DiffBytes calculates the approximated number of different bytes between two binary buffers.
|
|
|
+// We are not interested in the diff script itself. Instead, we track the sizes of `db` and `eb`
|
|
|
+// from the original implementation.
|
|
|
+func DiffBytes(obuf, nbuf []byte) int {
|
|
|
+ if len(nbuf) < len(obuf) {
|
|
|
+ obuf, nbuf = nbuf, obuf
|
|
|
+ }
|
|
|
+ var lenf int
|
|
|
+ I := qsufsort(obuf)
|
|
|
+ var dblen, eblen int
|
|
|
+
|
|
|
+ // Compute the differences, writing ctrl as we go
|
|
|
+ var scan, pos, length int
|
|
|
+ var lastscan, lastpos, lastoffset int
|
|
|
+ for scan < len(nbuf) {
|
|
|
+ var oldscore int
|
|
|
+ scan += length
|
|
|
+ for scsc := scan; scan < len(nbuf); scan++ {
|
|
|
+ pos, length = search(I, obuf, nbuf[scan:], 0, len(obuf))
|
|
|
+
|
|
|
+ for ; scsc < scan+length; scsc++ {
|
|
|
+ if scsc+lastoffset < len(obuf) &&
|
|
|
+ obuf[scsc+lastoffset] == nbuf[scsc] {
|
|
|
+ oldscore++
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (length == oldscore && length != 0) || length > oldscore+8 {
|
|
|
+ break
|
|
|
+ }
|
|
|
+
|
|
|
+ if scan+lastoffset < len(obuf) && obuf[scan+lastoffset] == nbuf[scan] {
|
|
|
+ oldscore--
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if length != oldscore || scan == len(nbuf) {
|
|
|
+ var s, Sf int
|
|
|
+ lenf = 0
|
|
|
+ for i := 0; lastscan+i < scan && lastpos+i < len(obuf); {
|
|
|
+ if obuf[lastpos+i] == nbuf[lastscan+i] {
|
|
|
+ s++
|
|
|
+ }
|
|
|
+ i++
|
|
|
+ if s*2-i > Sf*2-lenf {
|
|
|
+ Sf = s
|
|
|
+ lenf = i
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ lenb := 0
|
|
|
+ if scan < len(nbuf) {
|
|
|
+ var s, Sb int
|
|
|
+ for i := 1; (scan >= lastscan+i) && (pos >= i); i++ {
|
|
|
+ if obuf[pos-i] == nbuf[scan-i] {
|
|
|
+ s++
|
|
|
+ }
|
|
|
+ if s*2-i > Sb*2-lenb {
|
|
|
+ Sb = s
|
|
|
+ lenb = i
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if lastscan+lenf > scan-lenb {
|
|
|
+ overlap := (lastscan + lenf) - (scan - lenb)
|
|
|
+ s := 0
|
|
|
+ Ss := 0
|
|
|
+ lens := 0
|
|
|
+ for i := 0; i < overlap; i++ {
|
|
|
+ if nbuf[lastscan+lenf-overlap+i] == obuf[lastpos+lenf-overlap+i] {
|
|
|
+ s++
|
|
|
+ }
|
|
|
+ if nbuf[scan-lenb+i] == obuf[pos-lenb+i] {
|
|
|
+ s--
|
|
|
+ }
|
|
|
+ if s > Ss {
|
|
|
+ Ss = s
|
|
|
+ lens = i + 1
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ lenf += lens - overlap
|
|
|
+ lenb -= lens
|
|
|
+ }
|
|
|
+
|
|
|
+ var nonzero int
|
|
|
+ for i := 0; i < lenf; i++ {
|
|
|
+ if nbuf[lastscan+i]-obuf[lastpos+i] != 0 {
|
|
|
+ nonzero++
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ dblen += nonzero
|
|
|
+ eblen += (scan - lenb) - (lastscan + lenf)
|
|
|
+ lastscan = scan - lenb
|
|
|
+ lastpos = pos - lenb
|
|
|
+ lastoffset = pos - scan
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return dblen + eblen
|
|
|
+}
|