file.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. package burndown
  2. import (
  3. "fmt"
  4. "gopkg.in/src-d/go-git.v4/plumbing"
  5. "gopkg.in/src-d/hercules.v4/internal"
  6. "gopkg.in/src-d/hercules.v4/internal/rbtree"
  7. )
  8. // Updater is the function which is called back on File.Update().
  9. type Updater = func(currentTime, previousTime, delta int)
  10. // File encapsulates a balanced binary tree to store line intervals and
  11. // a cumulative mapping of values to the corresponding length counters. Users
  12. // are not supposed to create File-s directly; instead, they should call NewFile().
  13. // NewFileFromTree() is the special constructor which is useful in the tests.
  14. //
  15. // Len() returns the number of lines in File.
  16. //
  17. // Update() mutates File by introducing tree structural changes and updating the
  18. // length mapping.
  19. //
  20. // Dump() writes the tree to a string and Validate() checks the tree integrity.
  21. type File struct {
  22. // Git hash of the contents.
  23. Hash plumbing.Hash
  24. tree *rbtree.RBTree
  25. updaters []Updater
  26. }
  27. // TreeEnd denotes the value of the last leaf in the tree.
  28. const TreeEnd = -1
  29. // TreeMaxBinPower is the binary power value which corresponds to the maximum day which
  30. // can be stored in the tree.
  31. const TreeMaxBinPower = 14
  32. // TreeMergeMark is the special day which disables the status updates and is used in File.Merge().
  33. const TreeMergeMark = (1 << TreeMaxBinPower) - 1
  34. func (file *File) updateTime(currentTime, previousTime, delta int) {
  35. if currentTime & TreeMergeMark == TreeMergeMark {
  36. // merge mode
  37. return
  38. }
  39. if previousTime & TreeMergeMark == TreeMergeMark {
  40. previousTime = currentTime
  41. }
  42. for _, update := range file.updaters {
  43. update(currentTime, previousTime, delta)
  44. }
  45. }
  46. // NewFile initializes a new instance of File struct.
  47. //
  48. // time is the starting value of the first node;
  49. //
  50. // length is the starting length of the tree (the key of the second and the
  51. // last node);
  52. //
  53. // updaters are the attached interval length mappings.
  54. func NewFile(hash plumbing.Hash, time int, length int, updaters ...Updater) *File {
  55. file := &File{Hash: hash, tree: new(rbtree.RBTree), updaters: updaters}
  56. file.updateTime(time, time, length)
  57. if length > 0 {
  58. file.tree.Insert(rbtree.Item{Key: 0, Value: time})
  59. }
  60. file.tree.Insert(rbtree.Item{Key: length, Value: TreeEnd})
  61. return file
  62. }
  63. // NewFileFromTree is an alternative constructor for File which is used in tests.
  64. // The resulting tree is validated with Validate() to ensure the initial integrity.
  65. //
  66. // keys is a slice with the starting tree keys.
  67. //
  68. // vals is a slice with the starting tree values. Must match the size of keys.
  69. //
  70. // updaters are the attached interval length mappings.
  71. func NewFileFromTree(hash plumbing.Hash, keys []int, vals []int, updaters ...Updater) *File {
  72. file := &File{Hash: hash, tree: new(rbtree.RBTree), updaters: updaters}
  73. if len(keys) != len(vals) {
  74. panic("keys and vals must be of equal length")
  75. }
  76. for i := 0; i < len(keys); i++ {
  77. file.tree.Insert(rbtree.Item{Key: keys[i], Value: vals[i]})
  78. }
  79. file.Validate()
  80. return file
  81. }
  82. // Clone copies the file. It performs a deep copy of the tree;
  83. // depending on `clearStatuses` the original updaters are removed or not.
  84. // Any new `updaters` are appended.
  85. func (file *File) Clone(clearStatuses bool, updaters ...Updater) *File {
  86. clone := &File{Hash: file.Hash, tree: file.tree.Clone(), updaters: file.updaters}
  87. if clearStatuses {
  88. clone.updaters = []Updater{}
  89. }
  90. for _, updater := range updaters {
  91. clone.updaters = append(clone.updaters, updater)
  92. }
  93. return clone
  94. }
  95. // Len returns the File's size - that is, the maximum key in the tree of line
  96. // intervals.
  97. func (file *File) Len() int {
  98. return file.tree.Max().Item().Key
  99. }
  100. // Update modifies the underlying tree to adapt to the specified line changes.
  101. //
  102. // time is the time when the requested changes are made. Sets the values of the
  103. // inserted nodes.
  104. //
  105. // pos is the index of the line at which the changes are introduced.
  106. //
  107. // ins_length is the number of inserted lines after pos.
  108. //
  109. // del_length is the number of removed lines after pos. Deletions come before
  110. // the insertions.
  111. //
  112. // The code inside this function is probably the most important one throughout
  113. // the project. It is extensively covered with tests. If you find a bug, please
  114. // add the corresponding case in file_test.go.
  115. func (file *File) Update(time int, pos int, insLength int, delLength int) {
  116. if time < 0 {
  117. panic("time may not be negative")
  118. }
  119. if pos < 0 {
  120. panic("attempt to insert/delete at a negative position")
  121. }
  122. if insLength < 0 || delLength < 0 {
  123. panic("insLength and delLength must be non-negative")
  124. }
  125. if insLength|delLength == 0 {
  126. return
  127. }
  128. tree := file.tree
  129. if tree.Len() < 2 && tree.Min().Item().Key != 0 {
  130. panic("invalid tree state")
  131. }
  132. if pos > tree.Max().Item().Key {
  133. panic(fmt.Sprintf("attempt to insert after the end of the file: %d < %d",
  134. tree.Max().Item().Key, pos))
  135. }
  136. iter := tree.FindLE(pos)
  137. origin := *iter.Item()
  138. file.updateTime(time, time, insLength)
  139. if delLength == 0 {
  140. // simple case with insertions only
  141. if origin.Key < pos || (origin.Value == time && pos == 0) {
  142. iter = iter.Next()
  143. }
  144. for ; !iter.Limit(); iter = iter.Next() {
  145. iter.Item().Key += insLength
  146. }
  147. if origin.Value != time {
  148. tree.Insert(rbtree.Item{Key: pos, Value: time})
  149. if origin.Key < pos {
  150. tree.Insert(rbtree.Item{Key: pos + insLength, Value: origin.Value})
  151. }
  152. }
  153. return
  154. }
  155. // delete nodes
  156. for true {
  157. node := iter.Item()
  158. nextIter := iter.Next()
  159. if nextIter.Limit() {
  160. if pos+delLength > node.Key {
  161. panic("attempt to delete after the end of the file")
  162. }
  163. break
  164. }
  165. delta := internal.Min(nextIter.Item().Key, pos+delLength) - internal.Max(node.Key, pos)
  166. if delta <= 0 {
  167. break
  168. }
  169. file.updateTime(time, node.Value, -delta)
  170. if node.Key >= pos {
  171. origin = *node
  172. tree.DeleteWithIterator(iter)
  173. }
  174. iter = nextIter
  175. }
  176. // prepare for the keys update
  177. var previous *rbtree.Item
  178. if insLength > 0 && (origin.Value != time || origin.Key == pos) {
  179. // insert our new interval
  180. if iter.Item().Value == time {
  181. prev := iter.Prev()
  182. if prev.Item().Value != time {
  183. iter.Item().Key = pos
  184. } else {
  185. tree.DeleteWithIterator(iter)
  186. iter = prev
  187. }
  188. origin.Value = time // cancels the insertion after applying the delta
  189. } else {
  190. _, iter = tree.Insert(rbtree.Item{Key: pos, Value: time})
  191. }
  192. } else {
  193. // rollback 1 position back, see "for true" deletion cycle ^
  194. iter = iter.Prev()
  195. previous = iter.Item()
  196. }
  197. // update the keys of all subsequent nodes
  198. delta := insLength - delLength
  199. if delta != 0 {
  200. for iter = iter.Next(); !iter.Limit(); iter = iter.Next() {
  201. // we do not need to re-balance the tree
  202. iter.Item().Key += delta
  203. }
  204. // have to adjust origin in case insLength == 0
  205. if origin.Key > pos {
  206. origin.Key += delta
  207. }
  208. }
  209. if insLength > 0 {
  210. if origin.Value != time {
  211. tree.Insert(rbtree.Item{Key: pos + insLength, Value: origin.Value})
  212. } else if pos == 0 {
  213. // recover the beginning
  214. tree.Insert(rbtree.Item{Key: pos, Value: time})
  215. }
  216. } else if (pos > origin.Key && previous.Value != origin.Value) || pos == origin.Key || pos == 0 {
  217. // continue the original interval
  218. tree.Insert(rbtree.Item{Key: pos, Value: origin.Value})
  219. }
  220. }
  221. // Merge combines several prepared File-s together. Returns the value
  222. // indicating whether at least one File required merging.
  223. func (file *File) Merge(day int, others... *File) bool {
  224. dirty := false
  225. for _, other := range others {
  226. if other == nil {
  227. panic("merging File with nil")
  228. }
  229. if file.Hash != other.Hash {
  230. dirty = true
  231. break
  232. }
  233. }
  234. if !dirty {
  235. return false
  236. }
  237. myself := file.flatten()
  238. for _, other := range others {
  239. lines := other.flatten()
  240. if len(myself) != len(lines) {
  241. panic("file corruption, lines number mismatch during merge")
  242. }
  243. for i, l := range myself {
  244. ol := lines[i]
  245. if ol & TreeMergeMark == TreeMergeMark {
  246. continue
  247. }
  248. if l & TreeMergeMark == TreeMergeMark {
  249. myself[i] = ol
  250. } else if l != ol {
  251. // the same line introduced in different branches
  252. // consider the oldest version as the ground truth
  253. if l > ol {
  254. myself[i] = ol
  255. // subtract from the newer day l
  256. file.updateTime(ol, l, -1)
  257. } else {
  258. // subtract from the newer day ol
  259. file.updateTime(l, ol, -1)
  260. }
  261. }
  262. }
  263. }
  264. for i, l := range myself {
  265. if l & TreeMergeMark == TreeMergeMark {
  266. myself[i] = day
  267. file.updateTime(day, day, 1)
  268. }
  269. }
  270. // now we need to reconstruct the tree from the discrete values
  271. tree := &rbtree.RBTree{}
  272. for i, v := range myself {
  273. if i == 0 || v != myself[i - 1] {
  274. tree.Insert(rbtree.Item{Key: i, Value: v})
  275. }
  276. }
  277. tree.Insert(rbtree.Item{Key: len(myself), Value: TreeEnd})
  278. file.tree = tree
  279. return true
  280. }
  281. // Dump formats the underlying line interval tree into a string.
  282. // Useful for error messages, panic()-s and debugging.
  283. func (file *File) Dump() string {
  284. buffer := ""
  285. for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
  286. node := iter.Item()
  287. buffer += fmt.Sprintf("%d %d\n", node.Key, node.Value)
  288. }
  289. return buffer
  290. }
  291. // Validate checks the underlying line interval tree integrity.
  292. // The checks are as follows:
  293. //
  294. // 1. The minimum key must be 0 because the first line index is always 0.
  295. //
  296. // 2. The last node must carry TreeEnd value. This is the maintained invariant
  297. // which marks the ending of the last line interval.
  298. //
  299. // 3. Node keys must monotonically increase and never duplicate.
  300. func (file *File) Validate() {
  301. if file.tree.Min().Item().Key != 0 {
  302. panic("the tree must start with key 0")
  303. }
  304. if file.tree.Max().Item().Value != TreeEnd {
  305. panic(fmt.Sprintf("the last value in the tree must be %d", TreeEnd))
  306. }
  307. prevKey := -1
  308. for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
  309. node := iter.Item()
  310. if node.Key == prevKey {
  311. panic(fmt.Sprintf("duplicate tree key: %d", node.Key))
  312. }
  313. prevKey = node.Key
  314. }
  315. }
  316. // flatten represents the file as a slice of lines, each line's value being the corresponding day.
  317. func (file *File) flatten() []int {
  318. lines := make([]int, 0, file.Len())
  319. val := -1
  320. for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
  321. for i := len(lines); i < iter.Item().Key; i++ {
  322. lines = append(lines, val)
  323. }
  324. val = iter.Item().Value
  325. }
  326. return lines
  327. }