file.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. package burndown
  2. import (
  3. "fmt"
  4. "gopkg.in/src-d/go-git.v4/plumbing"
  5. "gopkg.in/src-d/hercules.v4/internal"
  6. "gopkg.in/src-d/hercules.v4/internal/rbtree"
  7. )
  8. // Status is the something we would like to keep track of in File.Update().
  9. type Status struct {
  10. data interface{}
  11. update func(interface{}, int, int, int)
  12. }
  13. // File encapsulates a balanced binary tree to store line intervals and
  14. // a cumulative mapping of values to the corresponding length counters. Users
  15. // are not supposed to create File-s directly; instead, they should call NewFile().
  16. // NewFileFromTree() is the special constructor which is useful in the tests.
  17. //
  18. // Len() returns the number of lines in File.
  19. //
  20. // Update() mutates File by introducing tree structural changes and updating the
  21. // length mapping.
  22. //
  23. // Dump() writes the tree to a string and Validate() checks the tree integrity.
  24. type File struct {
  25. // Git hash of the contents.
  26. Hash plumbing.Hash
  27. tree *rbtree.RBTree
  28. statuses []Status
  29. }
  30. // NewStatus initializes a new instance of Status struct. It is needed to set the only two
  31. // private fields which are not supposed to be replaced during the whole lifetime.
  32. func NewStatus(data interface{}, update func(interface{}, int, int, int)) Status {
  33. return Status{data: data, update: update}
  34. }
  35. // TreeEnd denotes the value of the last leaf in the tree.
  36. const TreeEnd = -1
  37. // TreeMaxBinPower is the binary power value which corresponds to the maximum day which
  38. // can be stored in the tree.
  39. const TreeMaxBinPower = 14
  40. // TreeMergeMark is the special day which disables the status updates and is used in File.Merge().
  41. const TreeMergeMark = (1 << TreeMaxBinPower) - 1
  42. func (file *File) updateTime(currentTime int, previousTime int, delta int) {
  43. if currentTime & TreeMergeMark == TreeMergeMark {
  44. // merge mode
  45. return
  46. }
  47. for _, status := range file.statuses {
  48. status.update(status.data, currentTime, previousTime, delta)
  49. }
  50. }
  51. // NewFile initializes a new instance of File struct.
  52. //
  53. // time is the starting value of the first node;
  54. //
  55. // length is the starting length of the tree (the key of the second and the
  56. // last node);
  57. //
  58. // statuses are the attached interval length mappings.
  59. func NewFile(hash plumbing.Hash, time int, length int, statuses ...Status) *File {
  60. file := &File{Hash: hash, tree: new(rbtree.RBTree), statuses: statuses}
  61. if length > 0 {
  62. file.updateTime(time, time, length)
  63. file.tree.Insert(rbtree.Item{Key: 0, Value: time})
  64. }
  65. file.tree.Insert(rbtree.Item{Key: length, Value: TreeEnd})
  66. return file
  67. }
  68. // NewFileFromTree is an alternative constructor for File which is used in tests.
  69. // The resulting tree is validated with Validate() to ensure the initial integrity.
  70. //
  71. // keys is a slice with the starting tree keys.
  72. //
  73. // vals is a slice with the starting tree values. Must match the size of keys.
  74. //
  75. // statuses are the attached interval length mappings.
  76. func NewFileFromTree(hash plumbing.Hash, keys []int, vals []int, statuses ...Status) *File {
  77. file := &File{Hash: hash, tree: new(rbtree.RBTree), statuses: statuses}
  78. if len(keys) != len(vals) {
  79. panic("keys and vals must be of equal length")
  80. }
  81. for i := 0; i < len(keys); i++ {
  82. file.tree.Insert(rbtree.Item{Key: keys[i], Value: vals[i]})
  83. }
  84. file.Validate()
  85. return file
  86. }
  87. // Clone copies the file. It performs a deep copy of the tree;
  88. // depending on `clearStatuses` the original statuses are removed or not.
  89. // Any new `statuses` are appended.
  90. func (file *File) Clone(clearStatuses bool, statuses ...Status) *File {
  91. clone := &File{Hash: file.Hash, tree: file.tree.Clone(), statuses: file.statuses}
  92. if clearStatuses {
  93. clone.statuses = []Status{}
  94. }
  95. for _, status := range statuses {
  96. clone.statuses = append(clone.statuses, status)
  97. }
  98. return clone
  99. }
  100. // Len returns the File's size - that is, the maximum key in the tree of line
  101. // intervals.
  102. func (file *File) Len() int {
  103. return file.tree.Max().Item().Key
  104. }
  105. // Update modifies the underlying tree to adapt to the specified line changes.
  106. //
  107. // time is the time when the requested changes are made. Sets the values of the
  108. // inserted nodes.
  109. //
  110. // pos is the index of the line at which the changes are introduced.
  111. //
  112. // ins_length is the number of inserted lines after pos.
  113. //
  114. // del_length is the number of removed lines after pos. Deletions come before
  115. // the insertions.
  116. //
  117. // The code inside this function is probably the most important one throughout
  118. // the project. It is extensively covered with tests. If you find a bug, please
  119. // add the corresponding case in file_test.go.
  120. func (file *File) Update(time int, pos int, insLength int, delLength int) {
  121. if time < 0 {
  122. panic("time may not be negative")
  123. }
  124. if pos < 0 {
  125. panic("attempt to insert/delete at a negative position")
  126. }
  127. if insLength < 0 || delLength < 0 {
  128. panic("insLength and delLength must be non-negative")
  129. }
  130. if insLength|delLength == 0 {
  131. return
  132. }
  133. tree := file.tree
  134. if tree.Len() < 2 && tree.Min().Item().Key != 0 {
  135. panic("invalid tree state")
  136. }
  137. if pos > tree.Max().Item().Key {
  138. panic(fmt.Sprintf("attempt to insert after the end of the file: %d < %d",
  139. tree.Max().Item().Key, pos))
  140. }
  141. iter := tree.FindLE(pos)
  142. origin := *iter.Item()
  143. file.updateTime(time, time, insLength)
  144. if delLength == 0 {
  145. // simple case with insertions only
  146. if origin.Key < pos || (origin.Value == time && pos == 0) {
  147. iter = iter.Next()
  148. }
  149. for ; !iter.Limit(); iter = iter.Next() {
  150. iter.Item().Key += insLength
  151. }
  152. if origin.Value != time {
  153. tree.Insert(rbtree.Item{Key: pos, Value: time})
  154. if origin.Key < pos {
  155. tree.Insert(rbtree.Item{Key: pos + insLength, Value: origin.Value})
  156. }
  157. }
  158. return
  159. }
  160. // delete nodes
  161. for true {
  162. node := iter.Item()
  163. nextIter := iter.Next()
  164. if nextIter.Limit() {
  165. if pos+delLength > node.Key {
  166. panic("attempt to delete after the end of the file")
  167. }
  168. break
  169. }
  170. delta := internal.Min(nextIter.Item().Key, pos+delLength) - internal.Max(node.Key, pos)
  171. if delta <= 0 {
  172. break
  173. }
  174. file.updateTime(time, node.Value, -delta)
  175. if node.Key >= pos {
  176. origin = *node
  177. tree.DeleteWithIterator(iter)
  178. }
  179. iter = nextIter
  180. }
  181. // prepare for the keys update
  182. var previous *rbtree.Item
  183. if insLength > 0 && (origin.Value != time || origin.Key == pos) {
  184. // insert our new interval
  185. if iter.Item().Value == time {
  186. prev := iter.Prev()
  187. if prev.Item().Value != time {
  188. iter.Item().Key = pos
  189. } else {
  190. tree.DeleteWithIterator(iter)
  191. iter = prev
  192. }
  193. origin.Value = time // cancels the insertion after applying the delta
  194. } else {
  195. _, iter = tree.Insert(rbtree.Item{Key: pos, Value: time})
  196. }
  197. } else {
  198. // rollback 1 position back, see "for true" deletion cycle ^
  199. iter = iter.Prev()
  200. previous = iter.Item()
  201. }
  202. // update the keys of all subsequent nodes
  203. delta := insLength - delLength
  204. if delta != 0 {
  205. for iter = iter.Next(); !iter.Limit(); iter = iter.Next() {
  206. // we do not need to re-balance the tree
  207. iter.Item().Key += delta
  208. }
  209. // have to adjust origin in case insLength == 0
  210. if origin.Key > pos {
  211. origin.Key += delta
  212. }
  213. }
  214. if insLength > 0 {
  215. if origin.Value != time {
  216. tree.Insert(rbtree.Item{Key: pos + insLength, Value: origin.Value})
  217. } else if pos == 0 {
  218. // recover the beginning
  219. tree.Insert(rbtree.Item{Key: pos, Value: time})
  220. }
  221. } else if (pos > origin.Key && previous.Value != origin.Value) || pos == origin.Key || pos == 0 {
  222. // continue the original interval
  223. tree.Insert(rbtree.Item{Key: pos, Value: origin.Value})
  224. }
  225. }
  226. // Merge combines several prepared File-s together. Returns the value
  227. // indicating whether at least one File required merging.
  228. func (file *File) Merge(day int, others... *File) bool {
  229. dirty := false
  230. for _, other := range others {
  231. if file.Hash != other.Hash {
  232. dirty = true
  233. break
  234. }
  235. }
  236. if !dirty {
  237. return false
  238. }
  239. myself := file.flatten()
  240. for _, other := range others {
  241. lines := other.flatten()
  242. if len(myself) != len(lines) {
  243. panic("file corruption, lines number mismatch during merge")
  244. }
  245. for i, l := range myself {
  246. ol := lines[i]
  247. if ol & TreeMergeMark == TreeMergeMark {
  248. continue
  249. }
  250. if l & TreeMergeMark == TreeMergeMark {
  251. myself[i] = ol
  252. } else if l != ol {
  253. // the same line introduced in different branches
  254. // consider the oldest version as the ground truth
  255. if l > ol {
  256. myself[i] = ol
  257. // subtract from the newer day l
  258. file.updateTime(ol, l, -1)
  259. } else {
  260. // subtract from the newer day ol
  261. file.updateTime(l, ol, -1)
  262. }
  263. }
  264. }
  265. }
  266. for i, l := range myself {
  267. if l & TreeMergeMark == TreeMergeMark {
  268. myself[i] = day
  269. file.updateTime(day, day, 1)
  270. }
  271. }
  272. // now we need to reconstruct the tree from the discrete values
  273. tree := &rbtree.RBTree{}
  274. for i, v := range myself {
  275. if i == 0 || v != myself[i - 1] {
  276. tree.Insert(rbtree.Item{Key: i, Value: v})
  277. }
  278. }
  279. tree.Insert(rbtree.Item{Key: len(myself), Value: TreeEnd})
  280. file.tree = tree
  281. return true
  282. }
  283. // Status returns the bound status object by the specified index.
  284. func (file *File) Status(index int) interface{} {
  285. if index < 0 || index >= len(file.statuses) {
  286. panic(fmt.Sprintf("status index %d is out of bounds [0, %d)",
  287. index, len(file.statuses)))
  288. }
  289. return file.statuses[index].data
  290. }
  291. // Dump formats the underlying line interval tree into a string.
  292. // Useful for error messages, panic()-s and debugging.
  293. func (file *File) Dump() string {
  294. buffer := ""
  295. for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
  296. node := iter.Item()
  297. buffer += fmt.Sprintf("%d %d\n", node.Key, node.Value)
  298. }
  299. return buffer
  300. }
  301. // Validate checks the underlying line interval tree integrity.
  302. // The checks are as follows:
  303. //
  304. // 1. The minimum key must be 0 because the first line index is always 0.
  305. //
  306. // 2. The last node must carry TreeEnd value. This is the maintained invariant
  307. // which marks the ending of the last line interval.
  308. //
  309. // 3. Node keys must monotonically increase and never duplicate.
  310. func (file *File) Validate() {
  311. if file.tree.Min().Item().Key != 0 {
  312. panic("the tree must start with key 0")
  313. }
  314. if file.tree.Max().Item().Value != TreeEnd {
  315. panic(fmt.Sprintf("the last value in the tree must be %d", TreeEnd))
  316. }
  317. prevKey := -1
  318. for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
  319. node := iter.Item()
  320. if node.Key == prevKey {
  321. panic(fmt.Sprintf("duplicate tree key: %d", node.Key))
  322. }
  323. prevKey = node.Key
  324. }
  325. }
  326. // flatten represents the file as a slice of lines, each line's value being the corresponding day.
  327. func (file *File) flatten() []int {
  328. lines := make([]int, 0, file.Len())
  329. val := -1
  330. for iter := file.tree.Min(); !iter.Limit(); iter = iter.Next() {
  331. for i := len(lines); i < iter.Item().Key; i++ {
  332. lines = append(lines, val)
  333. }
  334. val = iter.Item().Value
  335. }
  336. return lines
  337. }