123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303 |
- package plumbing
- // Adapted from https://github.com/kr/binarydist
- // Original license:
- //
- // Copyright 2012 Keith Rarick
- //
- // Permission is hereby granted, free of charge, to any person
- // obtaining a copy of this software and associated documentation
- // files (the "Software"), to deal in the Software without
- // restriction, including without limitation the rights to use,
- // copy, modify, merge, publish, distribute, sublicense, and/or sell
- // copies of the Software, and to permit persons to whom the
- // Software is furnished to do so, subject to the following
- // conditions:
- //
- // The above copyright notice and this permission notice shall be
- // included in all copies or substantial portions of the Software.
- //
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- // OTHER DEALINGS IN THE SOFTWARE.
- import (
- "bytes"
- )
- func swap(a []int, i, j int) { a[i], a[j] = a[j], a[i] }
- func split(I, V []int, start, length, h int) {
- var i, j, k, x, jj, kk int
- if length < 16 {
- for k = start; k < start+length; k += j {
- j = 1
- x = V[I[k]+h]
- for i = 1; k+i < start+length; i++ {
- if V[I[k+i]+h] < x {
- x = V[I[k+i]+h]
- j = 0
- }
- if V[I[k+i]+h] == x {
- swap(I, k+i, k+j)
- j++
- }
- }
- for i = 0; i < j; i++ {
- V[I[k+i]] = k + j - 1
- }
- if j == 1 {
- I[k] = -1
- }
- }
- return
- }
- x = V[I[start+length/2]+h]
- jj = 0
- kk = 0
- for i = start; i < start+length; i++ {
- if V[I[i]+h] < x {
- jj++
- }
- if V[I[i]+h] == x {
- kk++
- }
- }
- jj += start
- kk += jj
- i = start
- j = 0
- k = 0
- for i < jj {
- if V[I[i]+h] < x {
- i++
- } else if V[I[i]+h] == x {
- swap(I, i, jj+j)
- j++
- } else {
- swap(I, i, kk+k)
- k++
- }
- }
- for jj+j < kk {
- if V[I[jj+j]+h] == x {
- j++
- } else {
- swap(I, jj+j, kk+k)
- k++
- }
- }
- if jj > start {
- split(I, V, start, jj-start, h)
- }
- for i = 0; i < kk-jj; i++ {
- V[I[jj+i]] = kk - 1
- }
- if jj == kk-1 {
- I[jj] = -1
- }
- if start+length > kk {
- split(I, V, kk, start+length-kk, h)
- }
- }
- func qsufsort(obuf []byte) []int {
- var buckets [256]int
- var i, h int
- I := make([]int, len(obuf)+1)
- V := make([]int, len(obuf)+1)
- for _, c := range obuf {
- buckets[c]++
- }
- for i = 1; i < 256; i++ {
- buckets[i] += buckets[i-1]
- }
- copy(buckets[1:], buckets[:])
- buckets[0] = 0
- for i, c := range obuf {
- buckets[c]++
- I[buckets[c]] = i
- }
- I[0] = len(obuf)
- for i, c := range obuf {
- V[i] = buckets[c]
- }
- V[len(obuf)] = 0
- for i = 1; i < 256; i++ {
- if buckets[i] == buckets[i-1]+1 {
- I[buckets[i]] = -1
- }
- }
- I[0] = -1
- for h = 1; I[0] != -(len(obuf) + 1); h += h {
- var n int
- for i = 0; i < len(obuf)+1; {
- if I[i] < 0 {
- n -= I[i]
- i -= I[i]
- } else {
- if n != 0 {
- I[i-n] = -n
- }
- n = V[I[i]] + 1 - i
- split(I, V, i, n, h)
- i += n
- n = 0
- }
- }
- if n != 0 {
- I[i-n] = -n
- }
- }
- for i = 0; i < len(obuf)+1; i++ {
- I[V[i]] = i
- }
- return I
- }
- func matchlen(a, b []byte) (i int) {
- for i < len(a) && i < len(b) && a[i] == b[i] {
- i++
- }
- return i
- }
- func search(I []int, obuf, nbuf []byte, st, en int) (pos, n int) {
- if en-st < 2 {
- x := matchlen(obuf[I[st]:], nbuf)
- y := matchlen(obuf[I[en]:], nbuf)
- if x > y {
- return I[st], x
- }
- return I[en], y
- }
- x := st + (en-st)/2
- if bytes.Compare(obuf[I[x]:], nbuf) < 0 {
- return search(I, obuf, nbuf, x, en)
- }
- return search(I, obuf, nbuf, st, x)
- }
- // DiffBytes calculates the approximated number of different bytes between two binary buffers.
- // We are not interested in the diff script itself. Instead, we track the sizes of `db` and `eb`
- // from the original implementation.
- func DiffBytes(obuf, nbuf []byte) int {
- if len(nbuf) < len(obuf) {
- obuf, nbuf = nbuf, obuf
- }
- var lenf int
- I := qsufsort(obuf)
- var dblen, eblen int
- // Compute the differences, writing ctrl as we go
- var scan, pos, length int
- var lastscan, lastpos, lastoffset int
- for scan < len(nbuf) {
- var oldscore int
- scan += length
- for scsc := scan; scan < len(nbuf); scan++ {
- pos, length = search(I, obuf, nbuf[scan:], 0, len(obuf))
- for ; scsc < scan+length; scsc++ {
- if scsc+lastoffset < len(obuf) &&
- obuf[scsc+lastoffset] == nbuf[scsc] {
- oldscore++
- }
- }
- if (length == oldscore && length != 0) || length > oldscore+8 {
- break
- }
- if scan+lastoffset < len(obuf) && obuf[scan+lastoffset] == nbuf[scan] {
- oldscore--
- }
- }
- if length != oldscore || scan == len(nbuf) {
- var s, Sf int
- lenf = 0
- for i := 0; lastscan+i < scan && lastpos+i < len(obuf); {
- if obuf[lastpos+i] == nbuf[lastscan+i] {
- s++
- }
- i++
- if s*2-i > Sf*2-lenf {
- Sf = s
- lenf = i
- }
- }
- lenb := 0
- if scan < len(nbuf) {
- var s, Sb int
- for i := 1; (scan >= lastscan+i) && (pos >= i); i++ {
- if obuf[pos-i] == nbuf[scan-i] {
- s++
- }
- if s*2-i > Sb*2-lenb {
- Sb = s
- lenb = i
- }
- }
- }
- if lastscan+lenf > scan-lenb {
- overlap := (lastscan + lenf) - (scan - lenb)
- s := 0
- Ss := 0
- lens := 0
- for i := 0; i < overlap; i++ {
- if nbuf[lastscan+lenf-overlap+i] == obuf[lastpos+lenf-overlap+i] {
- s++
- }
- if nbuf[scan-lenb+i] == obuf[pos-lenb+i] {
- s--
- }
- if s > Ss {
- Ss = s
- lens = i + 1
- }
- }
- lenf += lens - overlap
- lenb -= lens
- }
- var nonzero int
- for i := 0; i < lenf; i++ {
- if nbuf[lastscan+i]-obuf[lastpos+i] != 0 {
- nonzero++
- }
- }
- dblen += nonzero
- eblen += (scan - lenb) - (lastscan + lenf)
- lastscan = scan - lenb
- lastpos = pos - lenb
- lastoffset = pos - scan
- }
- }
- return dblen + eblen
- }
|