file.go 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. package hercules
  2. import (
  3. "fmt"
  4. "gopkg.in/src-d/hercules.v3/rbtree"
  5. )
  6. // Status is the something we would like to keep track of in File.Update().
  7. type Status struct {
  8. data interface{}
  9. update func(interface{}, int, int, int)
  10. }
  11. // File encapsulates a balanced binary tree to store line intervals and
  12. // a cumulative mapping of values to the corresponding length counters. Users
  13. // are not supposed to create File-s directly; instead, they should call NewFile().
  14. // NewFileFromTree() is the special constructor which is useful in the tests.
  15. //
  16. // Len() returns the number of lines in File.
  17. //
  18. // Update() mutates File by introducing tree structural changes and updating the
  19. // length mapping.
  20. //
  21. // Dump() writes the tree to a string and Validate() checks the tree integrity.
  22. type File struct {
  23. tree *rbtree.RBTree
  24. statuses []Status
  25. }
  26. // NewStatus initializes a new instance of Status struct. It is needed to set the only two
  27. // private fields which are not supposed to be replaced during the whole lifetime.
  28. func NewStatus(data interface{}, update func(interface{}, int, int, int)) Status {
  29. return Status{data: data, update: update}
  30. }
  31. // TreeEnd denotes the value of the last leaf in the tree.
  32. const TreeEnd int = -1
  33. // The ugly side of Go.
  34. // template <typename T> please!
  35. // min calculates the minimum of two 32-bit integers.
  36. func min(a int, b int) int {
  37. if a < b {
  38. return a
  39. }
  40. return b
  41. }
  42. // min64 calculates the minimum of two 64-bit integers.
  43. func min64(a int64, b int64) int64 {
  44. if a < b {
  45. return a
  46. }
  47. return b
  48. }
  49. // max calculates the maximum of two 32-bit integers.
  50. func max(a int, b int) int {
  51. if a < b {
  52. return b
  53. }
  54. return a
  55. }
  56. // max64 calculates the maximum of two 64-bit integers.
  57. func max64(a int64, b int64) int64 {
  58. if a < b {
  59. return b
  60. }
  61. return a
  62. }
  63. // abs64 calculates the absolute value of a 64-bit integer.
  64. func abs64(v int64) int64 {
  65. if v <= 0 {
  66. return -v
  67. }
  68. return v
  69. }
  70. func (file *File) updateTime(currentTime int, previousTime int, delta int) {
  71. for _, status := range file.statuses {
  72. status.update(status.data, currentTime, previousTime, delta)
  73. }
  74. }
  75. // NewFile initializes a new instance of File struct.
  76. //
  77. // time is the starting value of the first node;
  78. //
  79. // length is the starting length of the tree (the key of the second and the
  80. // last node);
  81. //
  82. // statuses are the attached interval length mappings.
  83. func NewFile(time int, length int, statuses ...Status) *File {
  84. file := new(File)
  85. file.statuses = statuses
  86. file.tree = new(rbtree.RBTree)
  87. if length > 0 {
  88. file.updateTime(time, time, length)
  89. file.tree.Insert(rbtree.Item{Key: 0, Value: time})
  90. }
  91. file.tree.Insert(rbtree.Item{Key: length, Value: TreeEnd})
  92. return file
  93. }
  94. // NewFileFromTree is an alternative constructor for File which is used in tests.
  95. // The resulting tree is validated with Validate() to ensure the initial integrity.
  96. //
  97. // keys is a slice with the starting tree keys.
  98. //
  99. // vals is a slice with the starting tree values. Must match the size of keys.
  100. //
  101. // statuses are the attached interval length mappings.
  102. func NewFileFromTree(keys []int, vals []int, statuses ...Status) *File {
  103. file := new(File)
  104. file.statuses = statuses
  105. file.tree = new(rbtree.RBTree)
  106. if len(keys) != len(vals) {
  107. panic("keys and vals must be of equal length")
  108. }
  109. for i := 0; i < len(keys); i++ {
  110. file.tree.Insert(rbtree.Item{Key: keys[i], Value: vals[i]})
  111. }
  112. file.Validate()
  113. return file
  114. }
  115. // Len returns the File's size - that is, the maximum key in the tree of line
  116. // intervals.
  117. func (file *File) Len() int {
  118. return file.tree.Max().Item().Key
  119. }
  120. // Update modifies the underlying tree to adapt to the specified line changes.
  121. //
  122. // time is the time when the requested changes are made. Sets the values of the
  123. // inserted nodes.
  124. //
  125. // pos is the index of the line at which the changes are introduced.
  126. //
  127. // ins_length is the number of inserted lines after pos.
  128. //
  129. // del_length is the number of removed lines after pos. Deletions come before
  130. // the insertions.
  131. //
  132. // The code inside this function is probably the most important one throughout
  133. // the project. It is extensively covered with tests. If you find a bug, please
  134. // add the corresponding case in file_test.go.
  135. func (file *File) Update(time int, pos int, insLength int, delLength int) {
  136. if time < 0 {
  137. panic("time may not be negative")
  138. }
  139. if pos < 0 {
  140. panic("attempt to insert/delete at a negative position")
  141. }
  142. if insLength < 0 || delLength < 0 {
  143. panic("insLength and delLength must be non-negative")
  144. }
  145. if insLength|delLength == 0 {
  146. return
  147. }
  148. tree := file.tree
  149. if tree.Len() < 2 && tree.Min().Item().Key != 0 {
  150. panic("invalid tree state")
  151. }
  152. if pos > tree.Max().Item().Key {
  153. panic(fmt.Sprintf("attempt to insert after the end of the file: %d < %d",
  154. tree.Max().Item().Key, pos))
  155. }
  156. iter := tree.FindLE(pos)
  157. origin := *iter.Item()
  158. file.updateTime(time, time, insLength)
  159. if delLength == 0 {
  160. // simple case with insertions only
  161. if origin.Key < pos || (origin.Value == time && pos == 0) {
  162. iter = iter.Next()
  163. }
  164. for ; !iter.Limit(); iter = iter.Next() {
  165. iter.Item().Key += insLength
  166. }
  167. if origin.Value != time {
  168. tree.Insert(rbtree.Item{Key: pos, Value: time})
  169. if origin.Key < pos {
  170. tree.Insert(rbtree.Item{Key: pos + insLength, Value: origin.Value})
  171. }
  172. }
  173. return
  174. }
  175. // delete nodes
  176. for true {
  177. node := iter.Item()
  178. nextIter := iter.Next()
  179. if nextIter.Limit() {
  180. if pos+delLength > node.Key {
  181. panic("attempt to delete after the end of the file")
  182. }
  183. break
  184. }
  185. delta := min(nextIter.Item().Key, pos+delLength) - max(node.Key, pos)
  186. if delta <= 0 {
  187. break
  188. }
  189. file.updateTime(time, node.Value, -delta)
  190. if node.Key >= pos {
  191. origin = *node
  192. tree.DeleteWithIterator(iter)
  193. }
  194. iter = nextIter
  195. }
  196. // prepare for the keys update
  197. var previous *rbtree.Item
  198. if insLength > 0 && (origin.Value != time || origin.Key == pos) {
  199. // insert our new interval
  200. if iter.Item().Value == time {
  201. prev := iter.Prev()
  202. if prev.Item().Value != time {
  203. iter.Item().Key = pos
  204. } else {
  205. tree.DeleteWithIterator(iter)
  206. iter = prev
  207. }
  208. origin.Value = time // cancels the insertion after applying the delta
  209. } else {
  210. _, iter = tree.Insert(rbtree.Item{Key: pos, Value: time})
  211. }
  212. } else {
  213. // rollback 1 position back, see "for true" deletion cycle ^
  214. iter = iter.Prev()
  215. previous = iter.Item()
  216. }
  217. // update the keys of all subsequent nodes
  218. delta := insLength - delLength
  219. if delta != 0 {
  220. for iter = iter.Next(); !iter.Limit(); iter = iter.Next() {
  221. // we do not need to re-balance the tree
  222. iter.Item().Key += delta
  223. }
  224. // have to adjust origin in case insLength == 0
  225. if origin.Key > pos {
  226. origin.Key += delta
  227. }
  228. }
  229. if insLength > 0 {
  230. if origin.Value != time {
  231. tree.Insert(rbtree.Item{Key: pos + insLength, Value: origin.Value})
  232. } else if pos == 0 {
  233. // recover the beginning
  234. tree.Insert(rbtree.Item{Key: pos, Value: time})
  235. }
  236. } else if (pos > origin.Key && previous.Value != origin.Value) || pos == origin.Key || pos == 0 {
  237. // continue the original interval
  238. tree.Insert(rbtree.Item{Key: pos, Value: origin.Value})
  239. }
  240. }
  241. func (file *File) Status(index int) interface{} {
  242. if index < 0 || index >= len(file.statuses) {
  243. panic(fmt.Sprintf("status index %d is out of bounds [0, %d)",
  244. index, len(file.statuses)))
  245. }
  246. return file.statuses[index].data
  247. }
  248. // Dump formats the underlying line interval tree into a string.
  249. // Useful for error messages, panic()-s and debugging.
  250. func (file *File) Dump() string {
  251. buffer := ""
  252. for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
  253. node := iter.Item()
  254. buffer += fmt.Sprintf("%d %d\n", node.Key, node.Value)
  255. }
  256. return buffer
  257. }
  258. // Validate checks the underlying line interval tree integrity.
  259. // The checks are as follows:
  260. //
  261. // 1. The minimum key must be 0 because the first line index is always 0.
  262. //
  263. // 2. The last node must carry TreeEnd value. This is the maintained invariant
  264. // which marks the ending of the last line interval.
  265. //
  266. // 3. Node keys must monotonically increase and never duplicate.
  267. func (file *File) Validate() {
  268. if file.tree.Min().Item().Key != 0 {
  269. panic("the tree must start with key 0")
  270. }
  271. if file.tree.Max().Item().Value != TreeEnd {
  272. panic(fmt.Sprintf("the last value in the tree must be %d", TreeEnd))
  273. }
  274. prevKey := -1
  275. for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
  276. node := iter.Item()
  277. if node.Key == prevKey {
  278. panic(fmt.Sprintf("duplicate tree key: %d", node.Key))
  279. }
  280. prevKey = node.Key
  281. }
  282. }