file.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. package burndown
  2. import (
  3. "fmt"
  4. "log"
  5. "math"
  6. "gopkg.in/src-d/hercules.v10/internal"
  7. "gopkg.in/src-d/hercules.v10/internal/rbtree"
  8. )
  9. // Updater is the function which is called back on File.Update().
  10. type Updater = func(currentTime, previousTime, delta int)
  11. // File encapsulates a balanced binary tree to store line intervals and
  12. // a cumulative mapping of values to the corresponding length counters. Users
  13. // are not supposed to create File-s directly; instead, they should call NewFile().
  14. // NewFileFromTree() is the special constructor which is useful in the tests.
  15. //
  16. // Len() returns the number of lines in File.
  17. //
  18. // Update() mutates File by introducing tree structural changes and updating the
  19. // length mapping.
  20. //
  21. // Dump() writes the tree to a string and Validate() checks the tree integrity.
  22. type File struct {
  23. tree *rbtree.RBTree
  24. updaters []Updater
  25. }
  26. // TreeEnd denotes the value of the last leaf in the tree.
  27. const TreeEnd = math.MaxUint32
  28. // TreeMaxBinPower is the binary power value which corresponds to the maximum tick which
  29. // can be stored in the tree.
  30. const TreeMaxBinPower = 14
  31. // TreeMergeMark is the special day which disables the status updates and is used in File.Merge().
  32. const TreeMergeMark = (1 << TreeMaxBinPower) - 1
  33. func (file *File) updateTime(currentTime, previousTime, delta int) {
  34. if previousTime&TreeMergeMark == TreeMergeMark {
  35. if currentTime == previousTime {
  36. return
  37. }
  38. panic("previousTime cannot be TreeMergeMark")
  39. }
  40. if currentTime&TreeMergeMark == TreeMergeMark {
  41. // merge mode - we have already updated in one of the branches
  42. return
  43. }
  44. for _, update := range file.updaters {
  45. update(currentTime, previousTime, delta)
  46. }
  47. }
  48. // NewFile initializes a new instance of File struct.
  49. //
  50. // time is the starting value of the first node;
  51. //
  52. // length is the starting length of the tree (the key of the second and the
  53. // last node);
  54. //
  55. // updaters are the attached interval length mappings.
  56. func NewFile(time int, length int, allocator *rbtree.Allocator, updaters ...Updater) *File {
  57. file := &File{tree: rbtree.NewRBTree(allocator), updaters: updaters}
  58. file.updateTime(time, time, length)
  59. if time < 0 || time > math.MaxUint32 {
  60. log.Panicf("time is out of allowed range: %d", time)
  61. }
  62. if length > math.MaxUint32 {
  63. log.Panicf("length is out of allowed range: %d", length)
  64. }
  65. if length > 0 {
  66. file.tree.Insert(rbtree.Item{Key: 0, Value: uint32(time)})
  67. }
  68. file.tree.Insert(rbtree.Item{Key: uint32(length), Value: TreeEnd})
  69. return file
  70. }
  71. // NewFileFromTree is an alternative constructor for File which is used in tests.
  72. // The resulting tree is validated with Validate() to ensure the initial integrity.
  73. //
  74. // keys is a slice with the starting tree keys.
  75. //
  76. // vals is a slice with the starting tree values. Must match the size of keys.
  77. //
  78. // updaters are the attached interval length mappings.
  79. func NewFileFromTree(keys []int, vals []int, allocator *rbtree.Allocator, updaters ...Updater) *File {
  80. file := &File{tree: rbtree.NewRBTree(allocator), updaters: updaters}
  81. if len(keys) != len(vals) {
  82. panic("keys and vals must be of equal length")
  83. }
  84. for i, key := range keys {
  85. val := vals[i]
  86. if key < 0 || key >= math.MaxUint32 {
  87. log.Panicf("key is out of allowed range: [%d]=%d", i, key)
  88. }
  89. if val < 0 || val > math.MaxUint32 {
  90. log.Panicf("val is out of allowed range: [%d]=%d", i, val)
  91. }
  92. file.tree.Insert(rbtree.Item{Key: uint32(key), Value: uint32(val)})
  93. }
  94. file.Validate()
  95. return file
  96. }
  97. // CloneShallow copies the file. It performs a shallow copy of the tree: the allocator
  98. // must be Clone()-d beforehand.
  99. func (file *File) CloneShallow(allocator *rbtree.Allocator) *File {
  100. return &File{tree: file.tree.CloneShallow(allocator), updaters: file.updaters}
  101. }
  102. // CloneDeep copies the file. It performs a deep copy of the tree.
  103. func (file *File) CloneDeep(allocator *rbtree.Allocator) *File {
  104. return &File{tree: file.tree.CloneDeep(allocator), updaters: file.updaters}
  105. }
  106. // Delete deallocates the file.
  107. func (file *File) Delete() {
  108. file.tree.Erase()
  109. }
  110. // Len returns the File's size - that is, the maximum key in the tree of line
  111. // intervals.
  112. func (file File) Len() int {
  113. return int(file.tree.Max().Item().Key)
  114. }
  115. // Nodes returns the number of RBTree nodes in the file.
  116. func (file File) Nodes() int {
  117. return file.tree.Len()
  118. }
  119. // Update modifies the underlying tree to adapt to the specified line changes.
  120. //
  121. // time is the time when the requested changes are made. Sets the values of the
  122. // inserted nodes.
  123. //
  124. // pos is the index of the line at which the changes are introduced.
  125. //
  126. // ins_length is the number of inserted lines after pos.
  127. //
  128. // del_length is the number of removed lines after pos. Deletions come before
  129. // the insertions.
  130. //
  131. // The code inside this function is probably the most important one throughout
  132. // the project. It is extensively covered with tests. If you find a bug, please
  133. // add the corresponding case in file_test.go.
  134. func (file *File) Update(time int, pos int, insLength int, delLength int) {
  135. if time < 0 {
  136. panic("time may not be negative")
  137. }
  138. if time >= math.MaxUint32 {
  139. panic("time may not be >= MaxUint32")
  140. }
  141. if pos < 0 {
  142. panic("attempt to insert/delete at a negative position")
  143. }
  144. if pos > math.MaxUint32 {
  145. panic("pos may not be > MaxUint32")
  146. }
  147. if insLength < 0 || delLength < 0 {
  148. panic("insLength and delLength must be non-negative")
  149. }
  150. if insLength|delLength == 0 {
  151. return
  152. }
  153. tree := file.tree
  154. if tree.Len() < 2 && tree.Min().Item().Key != 0 {
  155. panic("invalid tree state")
  156. }
  157. if uint32(pos) > tree.Max().Item().Key {
  158. panic(fmt.Sprintf("attempt to insert after the end of the file: %d < %d",
  159. tree.Max().Item().Key, pos))
  160. }
  161. iter := tree.FindLE(uint32(pos))
  162. origin := *iter.Item()
  163. prevOrigin := origin
  164. {
  165. prevIter := iter.Prev()
  166. if prevIter.Item() != nil {
  167. prevOrigin = *prevIter.Item()
  168. }
  169. }
  170. if insLength > 0 {
  171. file.updateTime(time, time, insLength)
  172. }
  173. if delLength == 0 {
  174. // simple case with insertions only
  175. if origin.Key < uint32(pos) || (origin.Value == uint32(time) && (pos == 0 || uint32(pos) == origin.Key)) {
  176. iter = iter.Next()
  177. }
  178. for ; !iter.Limit(); iter = iter.Next() {
  179. iter.Item().Key += uint32(insLength)
  180. }
  181. if origin.Value != uint32(time) {
  182. tree.Insert(rbtree.Item{Key: uint32(pos), Value: uint32(time)})
  183. if origin.Key < uint32(pos) {
  184. tree.Insert(rbtree.Item{Key: uint32(pos + insLength), Value: origin.Value})
  185. }
  186. }
  187. return
  188. }
  189. // delete nodes
  190. for true {
  191. node := iter.Item()
  192. nextIter := iter.Next()
  193. if nextIter.Limit() {
  194. if uint32(pos+delLength) > node.Key {
  195. panic("attempt to delete after the end of the file")
  196. }
  197. break
  198. }
  199. delta := internal.Min(int(nextIter.Item().Key), pos+delLength) - internal.Max(int(node.Key), pos)
  200. if delta == 0 && insLength == 0 && origin.Key == uint32(pos) && prevOrigin.Value == node.Value {
  201. origin = *node
  202. tree.DeleteWithIterator(iter)
  203. iter = nextIter
  204. }
  205. if delta <= 0 {
  206. break
  207. }
  208. file.updateTime(time, int(node.Value), -delta)
  209. if node.Key >= uint32(pos) {
  210. origin = *node
  211. tree.DeleteWithIterator(iter)
  212. }
  213. iter = nextIter
  214. }
  215. // prepare for the keys update
  216. var previous *rbtree.Item
  217. if insLength > 0 && (origin.Value != uint32(time) || origin.Key == uint32(pos)) {
  218. // insert our new interval
  219. if iter.Item().Value == uint32(time) && int(iter.Item().Key)-delLength == pos {
  220. prev := iter.Prev()
  221. if prev.NegativeLimit() || prev.Item().Value != uint32(time) {
  222. iter.Item().Key = uint32(pos)
  223. } else {
  224. tree.DeleteWithIterator(iter)
  225. iter = prev
  226. }
  227. origin.Value = uint32(time) // cancels the insertion after applying the delta
  228. } else {
  229. _, iter = tree.Insert(rbtree.Item{Key: uint32(pos), Value: uint32(time)})
  230. }
  231. } else {
  232. // rollback 1 position back, see "for true" deletion cycle ^
  233. iter = iter.Prev()
  234. previous = iter.Item()
  235. }
  236. // update the keys of all subsequent nodes
  237. delta := insLength - delLength
  238. if delta != 0 {
  239. for iter = iter.Next(); !iter.Limit(); iter = iter.Next() {
  240. // we do not need to re-balance the tree
  241. iter.Item().Key = uint32(int(iter.Item().Key) + delta)
  242. }
  243. // have to adjust origin in case insLength == 0
  244. if origin.Key > uint32(pos) {
  245. origin.Key = uint32(int(origin.Key) + delta)
  246. }
  247. }
  248. if insLength > 0 {
  249. if origin.Value != uint32(time) {
  250. tree.Insert(rbtree.Item{Key: uint32(pos + insLength), Value: origin.Value})
  251. } else if pos == 0 {
  252. // recover the beginning
  253. tree.Insert(rbtree.Item{Key: uint32(pos), Value: uint32(time)})
  254. }
  255. } else if (uint32(pos) > origin.Key && previous != nil && previous.Value != origin.Value) ||
  256. (uint32(pos) == origin.Key && origin.Value != prevOrigin.Value) ||
  257. pos == 0 {
  258. // continue the original interval
  259. tree.Insert(rbtree.Item{Key: uint32(pos), Value: origin.Value})
  260. }
  261. }
  262. // Merge combines several prepared File-s together.
  263. func (file *File) Merge(day int, others ...*File) {
  264. myself := file.flatten()
  265. for _, other := range others {
  266. if other == nil {
  267. log.Panic("merging with a nil file")
  268. }
  269. lines := other.flatten()
  270. if len(myself) != len(lines) {
  271. log.Panicf("file corruption, lines number mismatch during merge %d != %d",
  272. len(myself), len(lines))
  273. }
  274. for i, l := range myself {
  275. ol := lines[i]
  276. if ol&TreeMergeMark == TreeMergeMark {
  277. continue
  278. }
  279. if l&TreeMergeMark == TreeMergeMark || l&TreeMergeMark > ol&TreeMergeMark {
  280. // the line is merged in myself and exists in other
  281. // OR the same line introduced in different branches
  282. // consider the oldest version as the ground truth in that case
  283. myself[i] = ol
  284. continue
  285. }
  286. }
  287. }
  288. for i, l := range myself {
  289. if l&TreeMergeMark == TreeMergeMark {
  290. // original merge conflict resolution
  291. myself[i] = day
  292. file.updateTime(day, day, 1)
  293. }
  294. }
  295. // now we need to reconstruct the tree from the discrete values
  296. file.tree.Erase()
  297. tree := rbtree.NewRBTree(file.tree.Allocator())
  298. for i, v := range myself {
  299. if i == 0 || v != myself[i-1] {
  300. tree.Insert(rbtree.Item{Key: uint32(i), Value: uint32(v)})
  301. }
  302. }
  303. tree.Insert(rbtree.Item{Key: uint32(len(myself)), Value: TreeEnd})
  304. file.tree = tree
  305. }
  306. // Dump formats the underlying line interval tree into a string.
  307. // Useful for error messages, panic()-s and debugging.
  308. func (file File) Dump() string {
  309. buffer := ""
  310. file.ForEach(func(line, value int) {
  311. buffer += fmt.Sprintf("%d %d\n", line, value)
  312. })
  313. return buffer
  314. }
  315. // Validate checks the underlying line interval tree integrity.
  316. // The checks are as follows:
  317. //
  318. // 1. The minimum key must be 0 because the first line index is always 0.
  319. //
  320. // 2. The last node must carry TreeEnd value. This is the maintained invariant
  321. // which marks the ending of the last line interval.
  322. //
  323. // 3. Node keys must monotonically increase and never duplicate.
  324. func (file File) Validate() {
  325. if file.tree.Min().Item().Key != 0 {
  326. log.Panic("the tree must start with key 0")
  327. }
  328. if file.tree.Max().Item().Value != TreeEnd {
  329. log.Panicf("the last value in the tree must be %d", TreeEnd)
  330. }
  331. prevKey := uint32(math.MaxUint32)
  332. for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
  333. node := iter.Item()
  334. if node.Key == prevKey {
  335. log.Panicf("duplicate tree key: %d", node.Key)
  336. }
  337. if node.Value == TreeMergeMark {
  338. log.Panicf("unmerged lines left: %d", node.Key)
  339. }
  340. prevKey = node.Key
  341. }
  342. }
  343. // ForEach visits each node in the underlying tree, in ascending key order.
  344. func (file File) ForEach(callback func(line, value int)) {
  345. for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
  346. item := iter.Item()
  347. key := int(item.Key)
  348. var value int
  349. if item.Value == math.MaxUint32 {
  350. value = -1
  351. } else {
  352. value = int(item.Value)
  353. }
  354. callback(key, value)
  355. }
  356. }
  357. // flatten represents the file as a slice of lines, each line's value being the corresponding day.
  358. func (file *File) flatten() []int {
  359. lines := make([]int, 0, file.Len())
  360. val := uint32(math.MaxUint32)
  361. for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
  362. for i := uint32(len(lines)); i < iter.Item().Key; i++ {
  363. lines = append(lines, int(val))
  364. }
  365. val = iter.Item().Value
  366. }
  367. return lines
  368. }