bsdiff.go 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. package plumbing
  2. // Adapted from https://github.com/kr/binarydist
  3. // Original license:
  4. //
  5. // Copyright 2012 Keith Rarick
  6. //
  7. // Permission is hereby granted, free of charge, to any person
  8. // obtaining a copy of this software and associated documentation
  9. // files (the "Software"), to deal in the Software without
  10. // restriction, including without limitation the rights to use,
  11. // copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. // copies of the Software, and to permit persons to whom the
  13. // Software is furnished to do so, subject to the following
  14. // conditions:
  15. //
  16. // The above copyright notice and this permission notice shall be
  17. // included in all copies or substantial portions of the Software.
  18. //
  19. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  21. // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  22. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  23. // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  24. // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  25. // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  26. // OTHER DEALINGS IN THE SOFTWARE.
  27. import (
  28. "bytes"
  29. )
  30. func swap(a []int, i, j int) { a[i], a[j] = a[j], a[i] }
  31. func split(I, V []int, start, length, h int) {
  32. var i, j, k, x, jj, kk int
  33. if length < 16 {
  34. for k = start; k < start+length; k += j {
  35. j = 1
  36. x = V[I[k]+h]
  37. for i = 1; k+i < start+length; i++ {
  38. if V[I[k+i]+h] < x {
  39. x = V[I[k+i]+h]
  40. j = 0
  41. }
  42. if V[I[k+i]+h] == x {
  43. swap(I, k+i, k+j)
  44. j++
  45. }
  46. }
  47. for i = 0; i < j; i++ {
  48. V[I[k+i]] = k + j - 1
  49. }
  50. if j == 1 {
  51. I[k] = -1
  52. }
  53. }
  54. return
  55. }
  56. x = V[I[start+length/2]+h]
  57. jj = 0
  58. kk = 0
  59. for i = start; i < start+length; i++ {
  60. if V[I[i]+h] < x {
  61. jj++
  62. }
  63. if V[I[i]+h] == x {
  64. kk++
  65. }
  66. }
  67. jj += start
  68. kk += jj
  69. i = start
  70. j = 0
  71. k = 0
  72. for i < jj {
  73. if V[I[i]+h] < x {
  74. i++
  75. } else if V[I[i]+h] == x {
  76. swap(I, i, jj+j)
  77. j++
  78. } else {
  79. swap(I, i, kk+k)
  80. k++
  81. }
  82. }
  83. for jj+j < kk {
  84. if V[I[jj+j]+h] == x {
  85. j++
  86. } else {
  87. swap(I, jj+j, kk+k)
  88. k++
  89. }
  90. }
  91. if jj > start {
  92. split(I, V, start, jj-start, h)
  93. }
  94. for i = 0; i < kk-jj; i++ {
  95. V[I[jj+i]] = kk - 1
  96. }
  97. if jj == kk-1 {
  98. I[jj] = -1
  99. }
  100. if start+length > kk {
  101. split(I, V, kk, start+length-kk, h)
  102. }
  103. }
  104. func qsufsort(obuf []byte) []int {
  105. var buckets [256]int
  106. var i, h int
  107. I := make([]int, len(obuf)+1)
  108. V := make([]int, len(obuf)+1)
  109. for _, c := range obuf {
  110. buckets[c]++
  111. }
  112. for i = 1; i < 256; i++ {
  113. buckets[i] += buckets[i-1]
  114. }
  115. copy(buckets[1:], buckets[:])
  116. buckets[0] = 0
  117. for i, c := range obuf {
  118. buckets[c]++
  119. I[buckets[c]] = i
  120. }
  121. I[0] = len(obuf)
  122. for i, c := range obuf {
  123. V[i] = buckets[c]
  124. }
  125. V[len(obuf)] = 0
  126. for i = 1; i < 256; i++ {
  127. if buckets[i] == buckets[i-1]+1 {
  128. I[buckets[i]] = -1
  129. }
  130. }
  131. I[0] = -1
  132. for h = 1; I[0] != -(len(obuf) + 1); h += h {
  133. var n int
  134. for i = 0; i < len(obuf)+1; {
  135. if I[i] < 0 {
  136. n -= I[i]
  137. i -= I[i]
  138. } else {
  139. if n != 0 {
  140. I[i-n] = -n
  141. }
  142. n = V[I[i]] + 1 - i
  143. split(I, V, i, n, h)
  144. i += n
  145. n = 0
  146. }
  147. }
  148. if n != 0 {
  149. I[i-n] = -n
  150. }
  151. }
  152. for i = 0; i < len(obuf)+1; i++ {
  153. I[V[i]] = i
  154. }
  155. return I
  156. }
  157. func matchlen(a, b []byte) (i int) {
  158. for i < len(a) && i < len(b) && a[i] == b[i] {
  159. i++
  160. }
  161. return i
  162. }
  163. func search(I []int, obuf, nbuf []byte, st, en int) (pos, n int) {
  164. if en-st < 2 {
  165. x := matchlen(obuf[I[st]:], nbuf)
  166. y := matchlen(obuf[I[en]:], nbuf)
  167. if x > y {
  168. return I[st], x
  169. }
  170. return I[en], y
  171. }
  172. x := st + (en-st)/2
  173. if bytes.Compare(obuf[I[x]:], nbuf) < 0 {
  174. return search(I, obuf, nbuf, x, en)
  175. }
  176. return search(I, obuf, nbuf, st, x)
  177. }
  178. // DiffBytes calculates the approximated number of different bytes between two binary buffers.
  179. // We are not interested in the diff script itself. Instead, we track the sizes of `db` and `eb`
  180. // from the original implementation.
  181. func DiffBytes(obuf, nbuf []byte) int {
  182. if len(nbuf) < len(obuf) {
  183. obuf, nbuf = nbuf, obuf
  184. }
  185. var lenf int
  186. I := qsufsort(obuf)
  187. var dblen, eblen int
  188. // Compute the differences, writing ctrl as we go
  189. var scan, pos, length int
  190. var lastscan, lastpos, lastoffset int
  191. for scan < len(nbuf) {
  192. var oldscore int
  193. scan += length
  194. for scsc := scan; scan < len(nbuf); scan++ {
  195. pos, length = search(I, obuf, nbuf[scan:], 0, len(obuf))
  196. for ; scsc < scan+length; scsc++ {
  197. if scsc+lastoffset < len(obuf) &&
  198. obuf[scsc+lastoffset] == nbuf[scsc] {
  199. oldscore++
  200. }
  201. }
  202. if (length == oldscore && length != 0) || length > oldscore+8 {
  203. break
  204. }
  205. if scan+lastoffset < len(obuf) && obuf[scan+lastoffset] == nbuf[scan] {
  206. oldscore--
  207. }
  208. }
  209. if length != oldscore || scan == len(nbuf) {
  210. var s, Sf int
  211. lenf = 0
  212. for i := 0; lastscan+i < scan && lastpos+i < len(obuf); {
  213. if obuf[lastpos+i] == nbuf[lastscan+i] {
  214. s++
  215. }
  216. i++
  217. if s*2-i > Sf*2-lenf {
  218. Sf = s
  219. lenf = i
  220. }
  221. }
  222. lenb := 0
  223. if scan < len(nbuf) {
  224. var s, Sb int
  225. for i := 1; (scan >= lastscan+i) && (pos >= i); i++ {
  226. if obuf[pos-i] == nbuf[scan-i] {
  227. s++
  228. }
  229. if s*2-i > Sb*2-lenb {
  230. Sb = s
  231. lenb = i
  232. }
  233. }
  234. }
  235. if lastscan+lenf > scan-lenb {
  236. overlap := (lastscan + lenf) - (scan - lenb)
  237. s := 0
  238. Ss := 0
  239. lens := 0
  240. for i := 0; i < overlap; i++ {
  241. if nbuf[lastscan+lenf-overlap+i] == obuf[lastpos+lenf-overlap+i] {
  242. s++
  243. }
  244. if nbuf[scan-lenb+i] == obuf[pos-lenb+i] {
  245. s--
  246. }
  247. if s > Ss {
  248. Ss = s
  249. lens = i + 1
  250. }
  251. }
  252. lenf += lens - overlap
  253. lenb -= lens
  254. }
  255. var nonzero int
  256. for i := 0; i < lenf; i++ {
  257. if nbuf[lastscan+i]-obuf[lastpos+i] != 0 {
  258. nonzero++
  259. }
  260. }
  261. dblen += nonzero
  262. eblen += (scan - lenb) - (lastscan + lenf)
  263. lastscan = scan - lenb
  264. lastpos = pos - lenb
  265. lastoffset = pos - scan
  266. }
  267. }
  268. return dblen + eblen
  269. }