file.go 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. package hercules
  2. import "fmt"
  3. // A status is the something we would like to update during File.Update().
  4. type Status struct {
  5. data interface{}
  6. update func(interface{}, int, int, int)
  7. }
  8. // A file encapsulates a balanced binary tree to store line intervals and
  9. // a cumulative mapping of values to the corresponding length counters. Users
  10. // are not supposed to create File-s directly; instead, they should call NewFile().
  11. // NewFileFromTree() is the special constructor which is useful in the tests.
  12. //
  13. // Len() returns the number of lines in File.
  14. //
  15. // Update() mutates File by introducing tree structural changes and updating the
  16. // length mapping.
  17. //
  18. // Dump() writes the tree to a string and Validate() checks the tree integrity.
  19. type File struct {
  20. tree *RBTree
  21. statuses []Status
  22. }
  23. func NewStatus(data interface{}, update func(interface{}, int, int, int)) Status {
  24. return Status{data: data, update: update}
  25. }
  26. // TreeEnd denotes the value of the last leaf in the tree.
  27. const TreeEnd int = -1
  28. // The ugly side of Go.
  29. // template <typename T> please!
  30. // min calculates the minimum of two 32-bit integers.
  31. func min(a int, b int) int {
  32. if a < b {
  33. return a
  34. }
  35. return b
  36. }
  37. // min64 calculates the minimum of two 64-bit integers.
  38. func min64(a int64, b int64) int64 {
  39. if a < b {
  40. return a
  41. }
  42. return b
  43. }
  44. // max calculates the maximum of two 32-bit integers.
  45. func max(a int, b int) int {
  46. if a < b {
  47. return b
  48. }
  49. return a
  50. }
  51. // max64 calculates the maximum of two 64-bit integers.
  52. func max64(a int64, b int64) int64 {
  53. if a < b {
  54. return b
  55. }
  56. return a
  57. }
  58. // abs64 calculates the absolute value of a 64-bit integer.
  59. func abs64(v int64) int64 {
  60. if v <= 0 {
  61. return -v
  62. }
  63. return v
  64. }
  65. func (file *File) updateTime(current_time int, previous_time int, delta int) {
  66. for _, status := range file.statuses {
  67. status.update(status.data, current_time, previous_time, delta)
  68. }
  69. }
  70. // NewFile initializes a new instance of File struct.
  71. //
  72. // time is the starting value of the first node;
  73. //
  74. // length is the starting length of the tree (the key of the second and the
  75. // last node);
  76. //
  77. // statuses are the attached interval length mappings.
  78. func NewFile(time int, length int, statuses ...Status) *File {
  79. file := new(File)
  80. file.statuses = statuses
  81. file.tree = new(RBTree)
  82. if length > 0 {
  83. file.updateTime(time, time, length)
  84. file.tree.Insert(Item{key: 0, value: time})
  85. }
  86. file.tree.Insert(Item{key: length, value: TreeEnd})
  87. return file
  88. }
  89. // NewFileFromTree is an alternative constructor for File which is used in tests.
  90. // The resulting tree is validated with Validate() to ensure the initial integrity.
  91. //
  92. // keys is a slice with the starting tree keys.
  93. //
  94. // vals is a slice with the starting tree values. Must match the size of keys.
  95. //
  96. // statuses are the attached interval length mappings.
  97. func NewFileFromTree(keys []int, vals []int, statuses ...Status) *File {
  98. file := new(File)
  99. file.statuses = statuses
  100. file.tree = new(RBTree)
  101. if len(keys) != len(vals) {
  102. panic("keys and vals must be of equal length")
  103. }
  104. for i := 0; i < len(keys); i++ {
  105. file.tree.Insert(Item{key: keys[i], value: vals[i]})
  106. }
  107. file.Validate()
  108. return file
  109. }
  110. // Len returns the File's size - that is, the maximum key in the tree of line
  111. // intervals.
  112. func (file *File) Len() int {
  113. return file.tree.Max().Item().key
  114. }
  115. // Update modifies the underlying tree to adapt to the specified line changes.
  116. //
  117. // time is the time when the requested changes are made. Sets the values of the
  118. // inserted nodes.
  119. //
  120. // pos is the index of the line at which the changes are introduced.
  121. //
  122. // ins_length is the number of inserted lines after pos.
  123. //
  124. // del_length is the number of removed lines after pos. Deletions come before
  125. // the insertions.
  126. //
  127. // The code inside this function is probably the most important one throughout
  128. // the project. It is extensively covered with tests. If you find a bug, please
  129. // add the corresponding case in file_test.go.
  130. func (file *File) Update(time int, pos int, ins_length int, del_length int) {
  131. if time < 0 {
  132. panic("time may not be negative")
  133. }
  134. if pos < 0 {
  135. panic("attempt to insert/delete at a negative position")
  136. }
  137. if ins_length < 0 || del_length < 0 {
  138. panic("ins_length and del_length must be nonnegative")
  139. }
  140. if ins_length|del_length == 0 {
  141. return
  142. }
  143. tree := file.tree
  144. if pos > tree.Max().Item().key {
  145. panic(fmt.Sprintf("attempt to insert after the end of the file: %d < %d",
  146. tree.Max().Item().key, pos))
  147. }
  148. if tree.Len() < 2 && tree.Min().Item().key != 0 {
  149. panic("invalid tree state")
  150. }
  151. iter := tree.FindLE(pos)
  152. origin := *iter.Item()
  153. file.updateTime(time, time, ins_length)
  154. if del_length == 0 {
  155. // simple case with insertions only
  156. if origin.key < pos || (origin.value == time && pos == 0) {
  157. iter = iter.Next()
  158. }
  159. for ; !iter.Limit(); iter = iter.Next() {
  160. iter.Item().key += ins_length
  161. }
  162. if origin.value != time {
  163. tree.Insert(Item{key: pos, value: time})
  164. if origin.key < pos {
  165. tree.Insert(Item{key: pos + ins_length, value: origin.value})
  166. }
  167. }
  168. return
  169. }
  170. // delete nodes
  171. for true {
  172. node := iter.Item()
  173. next_iter := iter.Next()
  174. if next_iter.Limit() {
  175. if pos+del_length > node.key {
  176. panic("attempt to delete after the end of the file")
  177. }
  178. break
  179. }
  180. delta := min(next_iter.Item().key, pos+del_length) - max(node.key, pos)
  181. if delta <= 0 {
  182. break
  183. }
  184. file.updateTime(time, node.value, -delta)
  185. if node.key >= pos {
  186. origin = *node
  187. tree.DeleteWithIterator(iter)
  188. }
  189. iter = next_iter
  190. }
  191. // prepare for the keys update
  192. var previous *Item
  193. if ins_length > 0 && (origin.value != time || origin.key == pos) {
  194. // insert our new interval
  195. if iter.Item().value == time {
  196. prev := iter.Prev()
  197. if prev.Item().value != time {
  198. iter.Item().key = pos
  199. } else {
  200. tree.DeleteWithIterator(iter)
  201. iter = prev
  202. }
  203. origin.value = time // cancels the insertion after applying the delta
  204. } else {
  205. _, iter = tree.Insert(Item{key: pos, value: time})
  206. }
  207. } else {
  208. // rollback 1 position back, see "for true" deletion cycle ^
  209. iter = iter.Prev()
  210. previous = iter.Item()
  211. }
  212. // update the keys of all subsequent nodes
  213. delta := ins_length - del_length
  214. if delta != 0 {
  215. for iter = iter.Next(); !iter.Limit(); iter = iter.Next() {
  216. // we do not need to re-balance the tree
  217. iter.Item().key += delta
  218. }
  219. // have to adjust origin in case ins_length == 0
  220. if origin.key > pos {
  221. origin.key += delta
  222. }
  223. }
  224. if ins_length > 0 {
  225. if origin.value != time {
  226. tree.Insert(Item{pos + ins_length, origin.value})
  227. } else if pos == 0 {
  228. // recover the beginning
  229. tree.Insert(Item{pos, time})
  230. }
  231. } else if (pos > origin.key && previous.value != origin.value) || pos == origin.key || pos == 0 {
  232. // continue the original interval
  233. tree.Insert(Item{pos, origin.value})
  234. }
  235. }
  236. func (file *File) Status(index int) interface{} {
  237. if index < 0 || index >= len(file.statuses) {
  238. panic(fmt.Sprintf("status index %d is out of bounds [0, %d)",
  239. index, len(file.statuses)))
  240. }
  241. return file.statuses[index].data
  242. }
  243. // Dump formats the underlying line interval tree into a string.
  244. // Useful for error messages, panic()-s and debugging.
  245. func (file *File) Dump() string {
  246. buffer := ""
  247. for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
  248. node := iter.Item()
  249. buffer += fmt.Sprintf("%d %d\n", node.key, node.value)
  250. }
  251. return buffer
  252. }
  253. // Validate checks the underlying line interval tree integrity.
  254. // The checks are as follows:
  255. //
  256. // 1. The minimum key must be 0 because the first line index is always 0.
  257. //
  258. // 2. The last node must carry TreeEnd value. This is the maintained invariant
  259. // which marks the ending of the last line interval.
  260. //
  261. // 3. Node keys must monotonically increase and never duplicate.
  262. func (file *File) Validate() {
  263. if file.tree.Min().Item().key != 0 {
  264. panic("the tree must start with key 0")
  265. }
  266. if file.tree.Max().Item().value != TreeEnd {
  267. panic(fmt.Sprintf("the last value in the tree must be %d", TreeEnd))
  268. }
  269. prev_key := -1
  270. for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
  271. node := iter.Item()
  272. if node.key == prev_key {
  273. panic(fmt.Sprintf("duplicate tree key: %d", node.key))
  274. }
  275. prev_key = node.key
  276. }
  277. }