file.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. package burndown
  2. import (
  3. "fmt"
  4. "log"
  5. "gopkg.in/src-d/hercules.v4/internal"
  6. "gopkg.in/src-d/hercules.v4/internal/rbtree"
  7. )
  8. // Updater is the function which is called back on File.Update().
  9. type Updater = func(currentTime, previousTime, delta int)
  10. // File encapsulates a balanced binary tree to store line intervals and
  11. // a cumulative mapping of values to the corresponding length counters. Users
  12. // are not supposed to create File-s directly; instead, they should call NewFile().
  13. // NewFileFromTree() is the special constructor which is useful in the tests.
  14. //
  15. // Len() returns the number of lines in File.
  16. //
  17. // Update() mutates File by introducing tree structural changes and updating the
  18. // length mapping.
  19. //
  20. // Dump() writes the tree to a string and Validate() checks the tree integrity.
  21. type File struct {
  22. tree *rbtree.RBTree
  23. updaters []Updater
  24. }
  25. // TreeEnd denotes the value of the last leaf in the tree.
  26. const TreeEnd = -1
  27. // TreeMaxBinPower is the binary power value which corresponds to the maximum day which
  28. // can be stored in the tree.
  29. const TreeMaxBinPower = 14
  30. // TreeMergeMark is the special day which disables the status updates and is used in File.Merge().
  31. const TreeMergeMark = (1 << TreeMaxBinPower) - 1
  32. func (file *File) updateTime(currentTime, previousTime, delta int) {
  33. if previousTime & TreeMergeMark == TreeMergeMark {
  34. if currentTime == previousTime {
  35. return
  36. }
  37. panic("previousTime cannot be TreeMergeMark")
  38. }
  39. if currentTime & TreeMergeMark == TreeMergeMark {
  40. // merge mode - `delta` is negative and we have already applied it in a branch
  41. return
  42. }
  43. for _, update := range file.updaters {
  44. update(currentTime, previousTime, delta)
  45. }
  46. }
  47. // NewFile initializes a new instance of File struct.
  48. //
  49. // time is the starting value of the first node;
  50. //
  51. // length is the starting length of the tree (the key of the second and the
  52. // last node);
  53. //
  54. // updaters are the attached interval length mappings.
  55. func NewFile(time int, length int, updaters ...Updater) *File {
  56. file := &File{tree: new(rbtree.RBTree), updaters: updaters}
  57. file.updateTime(time, time, length)
  58. if length > 0 {
  59. file.tree.Insert(rbtree.Item{Key: 0, Value: time})
  60. }
  61. file.tree.Insert(rbtree.Item{Key: length, Value: TreeEnd})
  62. return file
  63. }
  64. // NewFileFromTree is an alternative constructor for File which is used in tests.
  65. // The resulting tree is validated with Validate() to ensure the initial integrity.
  66. //
  67. // keys is a slice with the starting tree keys.
  68. //
  69. // vals is a slice with the starting tree values. Must match the size of keys.
  70. //
  71. // updaters are the attached interval length mappings.
  72. func NewFileFromTree(keys []int, vals []int, updaters ...Updater) *File {
  73. file := &File{tree: new(rbtree.RBTree), updaters: updaters}
  74. if len(keys) != len(vals) {
  75. panic("keys and vals must be of equal length")
  76. }
  77. for i := 0; i < len(keys); i++ {
  78. file.tree.Insert(rbtree.Item{Key: keys[i], Value: vals[i]})
  79. }
  80. file.Validate()
  81. return file
  82. }
  83. // Clone copies the file. It performs a deep copy of the tree;
  84. // depending on `clearStatuses` the original updaters are removed or not.
  85. // Any new `updaters` are appended.
  86. func (file *File) Clone(clearStatuses bool, updaters ...Updater) *File {
  87. clone := &File{tree: file.tree.Clone(), updaters: file.updaters}
  88. if clearStatuses {
  89. clone.updaters = []Updater{}
  90. }
  91. for _, updater := range updaters {
  92. clone.updaters = append(clone.updaters, updater)
  93. }
  94. return clone
  95. }
  96. // Len returns the File's size - that is, the maximum key in the tree of line
  97. // intervals.
  98. func (file *File) Len() int {
  99. return file.tree.Max().Item().Key
  100. }
  101. // Update modifies the underlying tree to adapt to the specified line changes.
  102. //
  103. // time is the time when the requested changes are made. Sets the values of the
  104. // inserted nodes.
  105. //
  106. // pos is the index of the line at which the changes are introduced.
  107. //
  108. // ins_length is the number of inserted lines after pos.
  109. //
  110. // del_length is the number of removed lines after pos. Deletions come before
  111. // the insertions.
  112. //
  113. // The code inside this function is probably the most important one throughout
  114. // the project. It is extensively covered with tests. If you find a bug, please
  115. // add the corresponding case in file_test.go.
  116. func (file *File) Update(time int, pos int, insLength int, delLength int) {
  117. if time < 0 {
  118. panic("time may not be negative")
  119. }
  120. if pos < 0 {
  121. panic("attempt to insert/delete at a negative position")
  122. }
  123. if insLength < 0 || delLength < 0 {
  124. panic("insLength and delLength must be non-negative")
  125. }
  126. if insLength|delLength == 0 {
  127. return
  128. }
  129. tree := file.tree
  130. if tree.Len() < 2 && tree.Min().Item().Key != 0 {
  131. panic("invalid tree state")
  132. }
  133. if pos > tree.Max().Item().Key {
  134. panic(fmt.Sprintf("attempt to insert after the end of the file: %d < %d",
  135. tree.Max().Item().Key, pos))
  136. }
  137. iter := tree.FindLE(pos)
  138. origin := *iter.Item()
  139. if insLength > 0 {
  140. file.updateTime(time, time, insLength)
  141. }
  142. if delLength == 0 {
  143. // simple case with insertions only
  144. if origin.Key < pos || (origin.Value == time && (pos == 0 || pos == origin.Key)) {
  145. iter = iter.Next()
  146. }
  147. for ; !iter.Limit(); iter = iter.Next() {
  148. iter.Item().Key += insLength
  149. }
  150. if origin.Value != time {
  151. tree.Insert(rbtree.Item{Key: pos, Value: time})
  152. if origin.Key < pos {
  153. tree.Insert(rbtree.Item{Key: pos + insLength, Value: origin.Value})
  154. }
  155. }
  156. return
  157. }
  158. // delete nodes
  159. for true {
  160. node := iter.Item()
  161. nextIter := iter.Next()
  162. if nextIter.Limit() {
  163. if pos+delLength > node.Key {
  164. panic("attempt to delete after the end of the file")
  165. }
  166. break
  167. }
  168. delta := internal.Min(nextIter.Item().Key, pos+delLength) - internal.Max(node.Key, pos)
  169. if delta <= 0 {
  170. break
  171. }
  172. file.updateTime(time, node.Value, -delta)
  173. if node.Key >= pos {
  174. origin = *node
  175. tree.DeleteWithIterator(iter)
  176. }
  177. iter = nextIter
  178. }
  179. // prepare for the keys update
  180. var previous *rbtree.Item
  181. if insLength > 0 && (origin.Value != time || origin.Key == pos) {
  182. // insert our new interval
  183. if iter.Item().Value == time && iter.Item().Key - delLength == pos {
  184. prev := iter.Prev()
  185. if prev.Item().Value != time {
  186. iter.Item().Key = pos
  187. } else {
  188. tree.DeleteWithIterator(iter)
  189. iter = prev
  190. }
  191. origin.Value = time // cancels the insertion after applying the delta
  192. } else {
  193. _, iter = tree.Insert(rbtree.Item{Key: pos, Value: time})
  194. }
  195. } else {
  196. // rollback 1 position back, see "for true" deletion cycle ^
  197. iter = iter.Prev()
  198. previous = iter.Item()
  199. }
  200. // update the keys of all subsequent nodes
  201. delta := insLength - delLength
  202. if delta != 0 {
  203. for iter = iter.Next(); !iter.Limit(); iter = iter.Next() {
  204. // we do not need to re-balance the tree
  205. iter.Item().Key += delta
  206. }
  207. // have to adjust origin in case insLength == 0
  208. if origin.Key > pos {
  209. origin.Key += delta
  210. }
  211. }
  212. if insLength > 0 {
  213. if origin.Value != time {
  214. tree.Insert(rbtree.Item{Key: pos + insLength, Value: origin.Value})
  215. } else if pos == 0 {
  216. // recover the beginning
  217. tree.Insert(rbtree.Item{Key: pos, Value: time})
  218. }
  219. } else if (pos > origin.Key && previous.Value != origin.Value) || pos == origin.Key || pos == 0 {
  220. // continue the original interval
  221. tree.Insert(rbtree.Item{Key: pos, Value: origin.Value})
  222. }
  223. }
  224. // Merge combines several prepared File-s together.
  225. func (file *File) Merge(day int, others... *File) {
  226. myself := file.flatten()
  227. for _, other := range others {
  228. if other == nil {
  229. log.Panic("merging with a nil file")
  230. }
  231. lines := other.flatten()
  232. if len(myself) != len(lines) {
  233. log.Panicf("file corruption, lines number mismatch during merge %d != %d",
  234. len(myself), len(lines))
  235. }
  236. for i, l := range myself {
  237. ol := lines[i]
  238. if ol & TreeMergeMark == TreeMergeMark {
  239. continue
  240. }
  241. // the following should happen rarely:
  242. // l & TreeMergeMark != TreeMergeMark && l != ol
  243. if l & TreeMergeMark == TreeMergeMark || l > ol {
  244. // 1 - the line is merged in myself and exists in other
  245. // 2 - the same line introduced in different branches,
  246. // consider the oldest version as the ground truth
  247. //
  248. // in case with (2) we should decrease the "future" counter,
  249. // but that really poisons the analysis
  250. myself[i] = ol
  251. }
  252. }
  253. }
  254. for i, l := range myself {
  255. if l & TreeMergeMark == TreeMergeMark {
  256. // original merge conflict resolution
  257. myself[i] = day
  258. file.updateTime(day, day, 1)
  259. }
  260. }
  261. // now we need to reconstruct the tree from the discrete values
  262. tree := &rbtree.RBTree{}
  263. for i, v := range myself {
  264. if i == 0 || v != myself[i - 1] {
  265. tree.Insert(rbtree.Item{Key: i, Value: v})
  266. }
  267. }
  268. tree.Insert(rbtree.Item{Key: len(myself), Value: TreeEnd})
  269. file.tree = tree
  270. }
  271. // Dump formats the underlying line interval tree into a string.
  272. // Useful for error messages, panic()-s and debugging.
  273. func (file *File) Dump() string {
  274. buffer := ""
  275. for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
  276. node := iter.Item()
  277. buffer += fmt.Sprintf("%d %d\n", node.Key, node.Value)
  278. }
  279. return buffer
  280. }
  281. // Validate checks the underlying line interval tree integrity.
  282. // The checks are as follows:
  283. //
  284. // 1. The minimum key must be 0 because the first line index is always 0.
  285. //
  286. // 2. The last node must carry TreeEnd value. This is the maintained invariant
  287. // which marks the ending of the last line interval.
  288. //
  289. // 3. Node keys must monotonically increase and never duplicate.
  290. func (file *File) Validate() {
  291. if file.tree.Min().Item().Key != 0 {
  292. log.Panic("the tree must start with key 0")
  293. }
  294. if file.tree.Max().Item().Value != TreeEnd {
  295. log.Panicf("the last value in the tree must be %d", TreeEnd)
  296. }
  297. prevKey := -1
  298. for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
  299. node := iter.Item()
  300. if node.Key == prevKey {
  301. log.Panicf("duplicate tree key: %d", node.Key)
  302. }
  303. if node.Value == TreeMergeMark {
  304. log.Panicf("unmerged lines left: %d", node.Key)
  305. }
  306. prevKey = node.Key
  307. }
  308. }
  309. // flatten represents the file as a slice of lines, each line's value being the corresponding day.
  310. func (file *File) flatten() []int {
  311. lines := make([]int, 0, file.Len())
  312. val := -1
  313. for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
  314. for i := len(lines); i < iter.Item().Key; i++ {
  315. lines = append(lines, val)
  316. }
  317. val = iter.Item().Value
  318. }
  319. return lines
  320. }