file.go 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. package hercules
  2. import "fmt"
  3. // A file encapsulates a balanced binary tree to store line intervals and
  4. // a cumulative mapping of values to the corresponding length counters. Users
  5. // are not supposed to create File-s directly; instead, they should call NewFile().
  6. // NewFileFromTree() is the special constructor which is useful in the tests.
  7. //
  8. // Len() returns the number of lines in File.
  9. //
  10. // Update() mutates File by introducing tree structural changes and updating the
  11. // length mapping.
  12. //
  13. // Dump() writes the tree to a string and Validate() checks the tree integrity.
  14. type File struct {
  15. tree *RBTree
  16. statuses []map[int]int64
  17. }
  18. // TreeEnd denotes the value of the last leaf in the tree.
  19. const TreeEnd int = -1
  20. // The ugly side of Go.
  21. // template <typename T> please!
  22. // min calculates the minimum of two 32-bit integers.
  23. func min(a int, b int) int {
  24. if a < b {
  25. return a
  26. }
  27. return b
  28. }
  29. // min64 calculates the minimum of two 64-bit integers.
  30. func min64(a int64, b int64) int64 {
  31. if a < b {
  32. return a
  33. }
  34. return b
  35. }
  36. // max calculates the maximum of two 32-bit integers.
  37. func max(a int, b int) int {
  38. if a < b {
  39. return b
  40. }
  41. return a
  42. }
  43. // max64 calculates the maximum of two 64-bit integers.
  44. func max64(a int64, b int64) int64 {
  45. if a < b {
  46. return b
  47. }
  48. return a
  49. }
  50. // abs64 calculates the absolute value of a 64-bit integer.
  51. func abs64(v int64) int64 {
  52. if v <= 0 {
  53. return -v
  54. }
  55. return v
  56. }
  57. func (file *File) updateTime(time int, delta int) {
  58. for _, status := range file.statuses {
  59. status[time] += int64(delta)
  60. }
  61. }
  62. // NewFile initializes a new instance of File struct.
  63. //
  64. // time is the starting value of the first node;
  65. //
  66. // length is the starting length of the tree (the key of the second and the
  67. // last node);
  68. //
  69. // statuses are the attached interval length mappings.
  70. func NewFile(time int, length int, statuses ...map[int]int64) *File {
  71. file := new(File)
  72. file.statuses = statuses
  73. file.tree = new(RBTree)
  74. if length > 0 {
  75. file.updateTime(time, length)
  76. file.tree.Insert(Item{key: 0, value: time})
  77. }
  78. file.tree.Insert(Item{key: length, value: TreeEnd})
  79. return file
  80. }
  81. // NewFileFromTree is an alternative constructor for File which is used in tests.
  82. // The resulting tree is validated with Validate() to ensure the initial integrity.
  83. //
  84. // keys is a slice with the starting tree keys.
  85. //
  86. // vals is a slice with the starting tree values. Must match the size of keys.
  87. //
  88. // statuses are the attached interval length mappings.
  89. func NewFileFromTree(keys []int, vals []int, statuses ...map[int]int64) *File {
  90. file := new(File)
  91. file.statuses = statuses
  92. file.tree = new(RBTree)
  93. if len(keys) != len(vals) {
  94. panic("keys and vals must be of equal length")
  95. }
  96. for i := 0; i < len(keys); i++ {
  97. file.tree.Insert(Item{key: keys[i], value: vals[i]})
  98. }
  99. file.Validate()
  100. return file
  101. }
  102. // Len returns the File's size - that is, the maximum key in the tree of line
  103. // intervals.
  104. func (file *File) Len() int {
  105. return file.tree.Max().Item().key
  106. }
  107. // Update modifies the underlying tree to adapt to the specified line changes.
  108. //
  109. // time is the time when the requested changes are made. Sets the values of the
  110. // inserted nodes.
  111. //
  112. // pos is the index of the line at which the changes are introduced.
  113. //
  114. // ins_length is the number of inserted lines after pos.
  115. //
  116. // del_length is the number of removed lines after pos. Deletions come before
  117. // the insertions.
  118. //
  119. // The code inside this function is probably the most important one throughout
  120. // the project. It is extensively covered with tests. If you find a bug, please
  121. // add the corresponding case in file_test.go.
  122. func (file *File) Update(time int, pos int, ins_length int, del_length int) {
  123. if time < 0 {
  124. panic("time may not be negative")
  125. }
  126. if pos < 0 {
  127. panic("attempt to insert/delete at a negative position")
  128. }
  129. if ins_length < 0 || del_length < 0 {
  130. panic("ins_length and del_length must be nonnegative")
  131. }
  132. if ins_length|del_length == 0 {
  133. return
  134. }
  135. tree := file.tree
  136. if pos > tree.Max().Item().key {
  137. panic(fmt.Sprintf("attempt to insert after the end of the file: %d < %d",
  138. tree.Max().Item().key, pos))
  139. }
  140. if tree.Len() < 2 && tree.Min().Item().key != 0 {
  141. panic("invalid tree state")
  142. }
  143. iter := tree.FindLE(pos)
  144. origin := *iter.Item()
  145. file.updateTime(time, ins_length)
  146. if del_length == 0 {
  147. // simple case with insertions only
  148. if origin.key < pos || (origin.value == time && pos == 0) {
  149. iter = iter.Next()
  150. }
  151. for ; !iter.Limit(); iter = iter.Next() {
  152. iter.Item().key += ins_length
  153. }
  154. if origin.value != time {
  155. tree.Insert(Item{key: pos, value: time})
  156. if origin.key < pos {
  157. tree.Insert(Item{key: pos + ins_length, value: origin.value})
  158. }
  159. }
  160. return
  161. }
  162. // delete nodes
  163. for true {
  164. node := iter.Item()
  165. next_iter := iter.Next()
  166. if next_iter.Limit() {
  167. if pos+del_length > node.key {
  168. panic("attempt to delete after the end of the file")
  169. }
  170. break
  171. }
  172. delta := min(next_iter.Item().key, pos+del_length) - max(node.key, pos)
  173. if delta <= 0 {
  174. break
  175. }
  176. file.updateTime(node.value, -delta)
  177. if node.key >= pos {
  178. origin = *node
  179. tree.DeleteWithIterator(iter)
  180. }
  181. iter = next_iter
  182. }
  183. // prepare for the keys update
  184. var previous *Item
  185. if ins_length > 0 && (origin.value != time || origin.key == pos) {
  186. // insert our new interval
  187. if iter.Item().value == time {
  188. prev := iter.Prev()
  189. if prev.Item().value != time {
  190. iter.Item().key = pos
  191. } else {
  192. tree.DeleteWithIterator(iter)
  193. iter = prev
  194. }
  195. origin.value = time // cancels the insertion after applying the delta
  196. } else {
  197. _, iter = tree.Insert(Item{key: pos, value: time})
  198. }
  199. } else {
  200. // rollback 1 position back, see "for true" deletion cycle ^
  201. iter = iter.Prev()
  202. previous = iter.Item()
  203. }
  204. // update the keys of all subsequent nodes
  205. delta := ins_length - del_length
  206. if delta != 0 {
  207. for iter = iter.Next(); !iter.Limit(); iter = iter.Next() {
  208. // we do not need to re-balance the tree
  209. iter.Item().key += delta
  210. }
  211. // have to adjust origin in case ins_length == 0
  212. if origin.key > pos {
  213. origin.key += delta
  214. }
  215. }
  216. if ins_length > 0 {
  217. if origin.value != time {
  218. tree.Insert(Item{pos + ins_length, origin.value})
  219. } else if pos == 0 {
  220. // recover the beginning
  221. tree.Insert(Item{pos, time})
  222. }
  223. } else if (pos > origin.key && previous.value != origin.value) || pos == origin.key || pos == 0 {
  224. // continue the original interval
  225. tree.Insert(Item{pos, origin.value})
  226. }
  227. }
  228. func (file *File) Status(index int) map[int]int64 {
  229. if index < 0 || index >= len(file.statuses) {
  230. panic(fmt.Sprintf("status index %d is out of bounds [0, %d)",
  231. index, len(file.statuses)))
  232. }
  233. return file.statuses[index]
  234. }
  235. // Dump formats the underlying line interval tree into a string.
  236. // Useful for error messages, panic()-s and debugging.
  237. func (file *File) Dump() string {
  238. buffer := ""
  239. for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
  240. node := iter.Item()
  241. buffer += fmt.Sprintf("%d %d\n", node.key, node.value)
  242. }
  243. return buffer
  244. }
  245. // Validate checks the underlying line interval tree integrity.
  246. // The checks are as follows:
  247. //
  248. // 1. The minimum key must be 0 because the first line index is always 0.
  249. //
  250. // 2. The last node must carry TreeEnd value. This is the maintained invariant
  251. // which marks the ending of the last line interval.
  252. //
  253. // 3. Node keys must monotonically increase and never duplicate.
  254. func (file *File) Validate() {
  255. if file.tree.Min().Item().key != 0 {
  256. panic("the tree must start with key 0")
  257. }
  258. if file.tree.Max().Item().value != TreeEnd {
  259. panic(fmt.Sprintf("the last value in the tree must be %d", TreeEnd))
  260. }
  261. prev_key := -1
  262. for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
  263. node := iter.Item()
  264. if node.key == prev_key {
  265. panic(fmt.Sprintf("duplicate tree key: %d", node.key))
  266. }
  267. prev_key = node.key
  268. }
  269. }