renames.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612
  1. package plumbing
  2. import (
  3. "fmt"
  4. "path/filepath"
  5. "sort"
  6. "strings"
  7. "sync"
  8. "time"
  9. "unicode/utf8"
  10. "github.com/sergi/go-diff/diffmatchpatch"
  11. "gopkg.in/src-d/go-git.v4"
  12. "gopkg.in/src-d/go-git.v4/plumbing"
  13. "gopkg.in/src-d/go-git.v4/plumbing/object"
  14. "gopkg.in/src-d/go-git.v4/utils/merkletrie"
  15. "gopkg.in/src-d/hercules.v10/internal"
  16. "gopkg.in/src-d/hercules.v10/internal/core"
  17. "gopkg.in/src-d/hercules.v10/internal/levenshtein"
  18. )
  19. // RenameAnalysis improves TreeDiff's results by searching for changed blobs under different
  20. // paths which are likely to be the result of a rename with subsequent edits.
  21. // RenameAnalysis is a PipelineItem.
  22. type RenameAnalysis struct {
  23. core.NoopMerger
  24. // SimilarityThreshold adjusts the heuristic to determine file renames.
  25. // It has the same units as cgit's -X rename-threshold or -M. Better to
  26. // set it to the default value of 80 (80%).
  27. SimilarityThreshold int
  28. // Timeout is the maximum time allowed to spend computing renames in a single commit.
  29. Timeout time.Duration
  30. repository *git.Repository
  31. l core.Logger
  32. }
  33. const (
  34. // RenameAnalysisDefaultThreshold specifies the default percentage of common lines in a pair
  35. // of files to consider them linked. The exact code of the decision is sizesAreClose().
  36. // CGit's default is 50%. Ours is 80% because 50% can be too computationally expensive.
  37. RenameAnalysisDefaultThreshold = 80
  38. // RenameAnalysisDefaultTimeout is the default value of RenameAnalysis.Timeout (in milliseconds).
  39. RenameAnalysisDefaultTimeout = 60000
  40. // ConfigRenameAnalysisSimilarityThreshold is the name of the configuration option
  41. // (RenameAnalysis.Configure()) which sets the similarity threshold.
  42. ConfigRenameAnalysisSimilarityThreshold = "RenameAnalysis.SimilarityThreshold"
  43. // ConfigRenameAnalysisTimeout is the name of the configuration option
  44. // (RenameAnalysis.Configure()) which sets the maximum time allowed to spend
  45. // computing renames in a single commit.
  46. ConfigRenameAnalysisTimeout = "RenameAnalysis.Timeout"
  47. // RenameAnalysisMinimumSize is the minimum size of a blob to be considered.
  48. RenameAnalysisMinimumSize = 32
  49. // RenameAnalysisMaxCandidates is the maximum number of rename candidates to consider per file.
  50. RenameAnalysisMaxCandidates = 50
  51. // RenameAnalysisSetSizeLimit is the maximum number of added + removed files for
  52. // RenameAnalysisMaxCandidates to be active; the bigger numbers set it to 1.
  53. RenameAnalysisSetSizeLimit = 1000
  54. // RenameAnalysisByteDiffSizeThreshold is the maximum size of each of the compared parts
  55. // to be diff-ed on byte level.
  56. RenameAnalysisByteDiffSizeThreshold = 100000
  57. )
  58. // Name of this PipelineItem. Uniquely identifies the type, used for mapping keys, etc.
  59. func (ra *RenameAnalysis) Name() string {
  60. return "RenameAnalysis"
  61. }
  62. // Provides returns the list of names of entities which are produced by this PipelineItem.
  63. // Each produced entity will be inserted into `deps` of dependent Consume()-s according
  64. // to this list. Also used by core.Registry to build the global map of providers.
  65. func (ra *RenameAnalysis) Provides() []string {
  66. return []string{DependencyTreeChanges}
  67. }
  68. // Requires returns the list of names of entities which are needed by this PipelineItem.
  69. // Each requested entity will be inserted into `deps` of Consume(). In turn, those
  70. // entities are Provides() upstream.
  71. func (ra *RenameAnalysis) Requires() []string {
  72. return []string{DependencyBlobCache, DependencyTreeChanges}
  73. }
  74. // ListConfigurationOptions returns the list of changeable public properties of this PipelineItem.
  75. func (ra *RenameAnalysis) ListConfigurationOptions() []core.ConfigurationOption {
  76. options := [...]core.ConfigurationOption{{
  77. Name: ConfigRenameAnalysisSimilarityThreshold,
  78. Description: "The threshold on the similarity index used to detect renames.",
  79. Flag: "M",
  80. Type: core.IntConfigurationOption,
  81. Default: RenameAnalysisDefaultThreshold}, {
  82. Name: ConfigRenameAnalysisTimeout,
  83. Description: "The maximum time (milliseconds) allowed to spend computing " +
  84. "renames in a single commit. 0 sets the default.",
  85. Flag: "renames-timeout",
  86. Type: core.IntConfigurationOption,
  87. Default: RenameAnalysisDefaultTimeout},
  88. }
  89. return options[:]
  90. }
  91. // Configure sets the properties previously published by ListConfigurationOptions().
  92. func (ra *RenameAnalysis) Configure(facts map[string]interface{}) error {
  93. if l, exists := facts[core.ConfigLogger].(core.Logger); exists {
  94. ra.l = l
  95. }
  96. if val, exists := facts[ConfigRenameAnalysisSimilarityThreshold].(int); exists {
  97. ra.SimilarityThreshold = val
  98. }
  99. if val, exists := facts[ConfigRenameAnalysisTimeout].(int); exists {
  100. if val < 0 {
  101. return fmt.Errorf("negative renames detection timeout is not allowed: %d", val)
  102. }
  103. ra.Timeout = time.Duration(val) * time.Millisecond
  104. }
  105. return nil
  106. }
  107. // Initialize resets the temporary caches and prepares this PipelineItem for a series of Consume()
  108. // calls. The repository which is going to be analysed is supplied as an argument.
  109. func (ra *RenameAnalysis) Initialize(repository *git.Repository) error {
  110. ra.l = core.NewLogger()
  111. if ra.SimilarityThreshold < 0 || ra.SimilarityThreshold > 100 {
  112. ra.l.Warnf("adjusted the similarity threshold to %d\n",
  113. RenameAnalysisDefaultThreshold)
  114. ra.SimilarityThreshold = RenameAnalysisDefaultThreshold
  115. }
  116. if ra.Timeout == 0 {
  117. ra.Timeout = time.Duration(RenameAnalysisDefaultTimeout) * time.Millisecond
  118. }
  119. ra.repository = repository
  120. return nil
  121. }
  122. // Consume runs this PipelineItem on the next commit data.
  123. // `deps` contain all the results from upstream PipelineItem-s as requested by Requires().
  124. // Additionally, DependencyCommit is always present there and represents the analysed *object.Commit.
  125. // This function returns the mapping with analysis results. The keys must be the same as
  126. // in Provides(). If there was an error, nil is returned.
  127. func (ra *RenameAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
  128. beginTime := time.Now()
  129. changes := deps[DependencyTreeChanges].(object.Changes)
  130. cache := deps[DependencyBlobCache].(map[plumbing.Hash]*CachedBlob)
  131. reducedChanges := make(object.Changes, 0, changes.Len())
  132. // Stage 1 - find renames by matching the hashes
  133. // n log(n)
  134. // We sort additions and deletions by hash and then do the single scan along
  135. // both slices.
  136. deleted := make(sortableChanges, 0, changes.Len())
  137. added := make(sortableChanges, 0, changes.Len())
  138. for _, change := range changes {
  139. action, err := change.Action()
  140. if err != nil {
  141. return nil, err
  142. }
  143. switch action {
  144. case merkletrie.Insert:
  145. added = append(added, sortableChange{change, change.To.TreeEntry.Hash})
  146. case merkletrie.Delete:
  147. deleted = append(deleted, sortableChange{change, change.From.TreeEntry.Hash})
  148. case merkletrie.Modify:
  149. reducedChanges = append(reducedChanges, change)
  150. }
  151. }
  152. sort.Sort(deleted)
  153. sort.Sort(added)
  154. stillDeleted := make(object.Changes, 0, deleted.Len())
  155. stillAdded := make(object.Changes, 0, added.Len())
  156. {
  157. a := 0
  158. d := 0
  159. for a < added.Len() && d < deleted.Len() {
  160. if added[a].hash == deleted[d].hash {
  161. reducedChanges = append(
  162. reducedChanges,
  163. &object.Change{From: deleted[d].change.From, To: added[a].change.To})
  164. a++
  165. d++
  166. } else if added[a].Less(&deleted[d]) {
  167. stillAdded = append(stillAdded, added[a].change)
  168. a++
  169. } else {
  170. stillDeleted = append(stillDeleted, deleted[d].change)
  171. d++
  172. }
  173. }
  174. for ; a < added.Len(); a++ {
  175. stillAdded = append(stillAdded, added[a].change)
  176. }
  177. for ; d < deleted.Len(); d++ {
  178. stillDeleted = append(stillDeleted, deleted[d].change)
  179. }
  180. }
  181. // Stage 2 - apply the similarity threshold
  182. // n^2 but actually linear
  183. // We sort the blobs by size and do the single linear scan.
  184. maxCandidates := RenameAnalysisMaxCandidates
  185. if len(stillAdded)+len(stillDeleted) > RenameAnalysisSetSizeLimit {
  186. maxCandidates = 1
  187. }
  188. addedBlobs := make(sortableBlobs, 0, stillAdded.Len())
  189. deletedBlobs := make(sortableBlobs, 0, stillDeleted.Len())
  190. var smallChanges []*object.Change
  191. for _, change := range stillAdded {
  192. blob := cache[change.To.TreeEntry.Hash]
  193. if blob.Size < RenameAnalysisMinimumSize {
  194. smallChanges = append(smallChanges, change)
  195. } else {
  196. addedBlobs = append(
  197. addedBlobs, sortableBlob{change: change, size: blob.Size})
  198. }
  199. }
  200. for _, change := range stillDeleted {
  201. blob := cache[change.From.TreeEntry.Hash]
  202. if blob.Size < RenameAnalysisMinimumSize {
  203. smallChanges = append(smallChanges, change)
  204. } else {
  205. deletedBlobs = append(
  206. deletedBlobs, sortableBlob{change: change, size: blob.Size})
  207. }
  208. }
  209. sort.Sort(addedBlobs)
  210. sort.Sort(deletedBlobs)
  211. finished := make(chan bool, 2)
  212. finishedA := make(chan bool, 1)
  213. finishedB := make(chan bool, 1)
  214. errs := make(chan error)
  215. matchesA := make(object.Changes, 0, changes.Len())
  216. matchesB := make(object.Changes, 0, changes.Len())
  217. addedBlobsA := addedBlobs
  218. addedBlobsB := make(sortableBlobs, len(addedBlobs))
  219. copy(addedBlobsB, addedBlobs)
  220. deletedBlobsA := deletedBlobs
  221. deletedBlobsB := make(sortableBlobs, len(deletedBlobs))
  222. copy(deletedBlobsB, deletedBlobs)
  223. wg := sync.WaitGroup{}
  224. matchA := func() {
  225. defer func() {
  226. finished <- true
  227. wg.Done()
  228. }()
  229. aStart := 0
  230. // we will try to find a matching added blob for each deleted blob
  231. for d := 0; d < deletedBlobsA.Len() && time.Now().Sub(beginTime) < ra.Timeout; d++ {
  232. myBlob := cache[deletedBlobsA[d].change.From.TreeEntry.Hash]
  233. mySize := deletedBlobsA[d].size
  234. myName := filepath.Base(deletedBlobsA[d].change.From.Name)
  235. var a int
  236. for a = aStart; a < addedBlobsA.Len() && !ra.sizesAreClose(mySize, addedBlobsA[a].size); a++ {
  237. }
  238. aStart = a
  239. foundMatch := false
  240. // get the list of possible candidates and sort by file name similarity
  241. var candidates []int
  242. for a = aStart; a < addedBlobsA.Len() && ra.sizesAreClose(mySize, addedBlobsA[a].size); a++ {
  243. candidates = append(candidates, a)
  244. }
  245. sortRenameCandidates(candidates, myName, func(a int) string {
  246. return addedBlobsA[a].change.To.Name
  247. })
  248. var ci int
  249. for ci, a = range candidates {
  250. select {
  251. case <-finished:
  252. return
  253. default:
  254. break
  255. }
  256. if ci > maxCandidates {
  257. break
  258. }
  259. blobsAreClose, err := ra.blobsAreClose(
  260. myBlob, cache[addedBlobsA[a].change.To.TreeEntry.Hash])
  261. if err != nil {
  262. errs <- err
  263. return
  264. }
  265. if blobsAreClose {
  266. foundMatch = true
  267. matchesA = append(
  268. matchesA,
  269. &object.Change{
  270. From: deletedBlobsA[d].change.From,
  271. To: addedBlobsA[a].change.To})
  272. break
  273. }
  274. }
  275. if foundMatch {
  276. deletedBlobsA = append(deletedBlobsA[:d], deletedBlobsA[d+1:]...)
  277. d--
  278. addedBlobsA = append(addedBlobsA[:a], addedBlobsA[a+1:]...)
  279. }
  280. }
  281. finishedA <- true
  282. }
  283. matchB := func() {
  284. defer func() {
  285. finished <- true
  286. wg.Done()
  287. }()
  288. dStart := 0
  289. for a := 0; a < addedBlobsB.Len() && time.Now().Sub(beginTime) < ra.Timeout; a++ {
  290. myBlob := cache[addedBlobsB[a].change.To.TreeEntry.Hash]
  291. mySize := addedBlobsB[a].size
  292. myName := filepath.Base(addedBlobsB[a].change.To.Name)
  293. var d int
  294. for d = dStart; d < deletedBlobsB.Len() && !ra.sizesAreClose(mySize, deletedBlobsB[d].size); d++ {
  295. }
  296. dStart = d
  297. foundMatch := false
  298. // get the list of possible candidates and sort by file name similarity
  299. var candidates []int
  300. for d = dStart; d < deletedBlobsB.Len() && ra.sizesAreClose(mySize, deletedBlobsB[d].size); d++ {
  301. candidates = append(candidates, d)
  302. }
  303. sortRenameCandidates(candidates, myName, func(d int) string {
  304. return deletedBlobsB[d].change.From.Name
  305. })
  306. var ci int
  307. for ci, d = range candidates {
  308. select {
  309. case <-finished:
  310. return
  311. default:
  312. break
  313. }
  314. if ci > maxCandidates {
  315. break
  316. }
  317. blobsAreClose, err := ra.blobsAreClose(
  318. myBlob, cache[deletedBlobsB[d].change.From.TreeEntry.Hash])
  319. if err != nil {
  320. errs <- err
  321. return
  322. }
  323. if blobsAreClose {
  324. foundMatch = true
  325. matchesB = append(
  326. matchesB,
  327. &object.Change{
  328. From: deletedBlobsB[d].change.From,
  329. To: addedBlobsB[a].change.To})
  330. break
  331. }
  332. }
  333. if foundMatch {
  334. addedBlobsB = append(addedBlobsB[:a], addedBlobsB[a+1:]...)
  335. a--
  336. deletedBlobsB = append(deletedBlobsB[:d], deletedBlobsB[d+1:]...)
  337. }
  338. }
  339. finishedB <- true
  340. }
  341. // run two functions in parallel, and take the result from the one which finished earlier
  342. wg.Add(2)
  343. go matchA()
  344. go matchB()
  345. wg.Wait()
  346. var matches object.Changes
  347. select {
  348. case err := <-errs:
  349. return nil, err
  350. case <-finishedA:
  351. addedBlobs = addedBlobsA
  352. deletedBlobs = deletedBlobsA
  353. matches = matchesA
  354. case <-finishedB:
  355. addedBlobs = addedBlobsB
  356. deletedBlobs = deletedBlobsB
  357. matches = matchesB
  358. default:
  359. panic("Impossible happened: two functions returned without an error " +
  360. "but no results from both")
  361. }
  362. // Stage 3 - we give up, everything left are independent additions and deletions
  363. for _, change := range matches {
  364. reducedChanges = append(reducedChanges, change)
  365. }
  366. for _, blob := range addedBlobs {
  367. reducedChanges = append(reducedChanges, blob.change)
  368. }
  369. for _, blob := range deletedBlobs {
  370. reducedChanges = append(reducedChanges, blob.change)
  371. }
  372. for _, change := range smallChanges {
  373. reducedChanges = append(reducedChanges, change)
  374. }
  375. return map[string]interface{}{DependencyTreeChanges: reducedChanges}, nil
  376. }
  377. // Fork clones this PipelineItem.
  378. func (ra *RenameAnalysis) Fork(n int) []core.PipelineItem {
  379. return core.ForkSamePipelineItem(ra, n)
  380. }
  381. func (ra *RenameAnalysis) sizesAreClose(size1 int64, size2 int64) bool {
  382. size := internal.Max64(1, internal.Max64(size1, size2))
  383. return (internal.Abs64(size1-size2)*10000)/size <= int64(100-ra.SimilarityThreshold)*100
  384. }
  385. func (ra *RenameAnalysis) blobsAreClose(blob1 *CachedBlob, blob2 *CachedBlob) (bool, error) {
  386. cleanReturn := false
  387. defer func() {
  388. if !cleanReturn {
  389. ra.l.Warnf("\nunclean return detected for blobs '%s' and '%s'\n",
  390. blob1.Hash.String(), blob2.Hash.String())
  391. }
  392. }()
  393. _, err1 := blob1.CountLines()
  394. _, err2 := blob2.CountLines()
  395. if err1 == ErrorBinary || err2 == ErrorBinary {
  396. // binary mode
  397. bsdifflen := DiffBytes(blob1.Data, blob2.Data)
  398. delta := int((int64(bsdifflen) * 100) / internal.Max64(
  399. internal.Min64(blob1.Size, blob2.Size), 1))
  400. cleanReturn = true
  401. return 100-delta >= ra.SimilarityThreshold, nil
  402. }
  403. src, dst := string(blob1.Data), string(blob2.Data)
  404. maxSize := internal.Max(1, internal.Max(utf8.RuneCountInString(src), utf8.RuneCountInString(dst)))
  405. // compute the line-by-line diff, then the char-level diffs of the del-ins blocks
  406. // yes, this algorithm is greedy and not exact
  407. dmp := diffmatchpatch.New()
  408. dmp.DiffTimeout = time.Hour
  409. srcLineRunes, dstLineRunes, _ := dmp.DiffLinesToRunes(src, dst)
  410. // the third returned value, []string, is the mapping from runes to lines
  411. // we cannot use it because it is approximate and has string collisions
  412. // that is, the mapping is wrong for huge files
  413. diffs := dmp.DiffMainRunes(srcLineRunes, dstLineRunes, false)
  414. srcPositions := calcLinePositions(src)
  415. dstPositions := calcLinePositions(dst)
  416. var common, posSrc, prevPosSrc, posDst int
  417. possibleDelInsBlock := false
  418. for _, edit := range diffs {
  419. switch edit.Type {
  420. case diffmatchpatch.DiffDelete:
  421. possibleDelInsBlock = true
  422. prevPosSrc = posSrc
  423. posSrc += utf8.RuneCountInString(edit.Text)
  424. case diffmatchpatch.DiffInsert:
  425. nextPosDst := posDst + utf8.RuneCountInString(edit.Text)
  426. if possibleDelInsBlock {
  427. possibleDelInsBlock = false
  428. if internal.Max(srcPositions[posSrc]-srcPositions[prevPosSrc],
  429. dstPositions[nextPosDst]-dstPositions[posDst]) < RenameAnalysisByteDiffSizeThreshold {
  430. localDmp := diffmatchpatch.New()
  431. localDmp.DiffTimeout = time.Hour
  432. localSrc := src[srcPositions[prevPosSrc]:srcPositions[posSrc]]
  433. localDst := dst[dstPositions[posDst]:dstPositions[nextPosDst]]
  434. localDiffs := localDmp.DiffMainRunes(
  435. strToLiteralRunes(localSrc), strToLiteralRunes(localDst), false)
  436. for _, localEdit := range localDiffs {
  437. if localEdit.Type == diffmatchpatch.DiffEqual {
  438. common += utf8.RuneCountInString(localEdit.Text)
  439. }
  440. }
  441. }
  442. }
  443. posDst = nextPosDst
  444. case diffmatchpatch.DiffEqual:
  445. possibleDelInsBlock = false
  446. step := utf8.RuneCountInString(edit.Text)
  447. // for i := range edit.Text does *not* work
  448. // idk why, but `i` appears to be bigger than the number of runes
  449. for i := 0; i < step; i++ {
  450. common += srcPositions[posSrc+i+1] - srcPositions[posSrc+i]
  451. }
  452. posSrc += step
  453. posDst += step
  454. }
  455. if possibleDelInsBlock {
  456. continue
  457. }
  458. // supposing that the rest of the lines are the same (they are not - too optimistic),
  459. // estimate the maximum similarity and exit the loop if it lower than our threshold
  460. var srcPendingSize, dstPendingSize int
  461. srcPendingSize = len(src) - srcPositions[posSrc]
  462. dstPendingSize = len(dst) - dstPositions[posDst]
  463. maxCommon := common + internal.Min(srcPendingSize, dstPendingSize)
  464. similarity := (maxCommon * 100) / maxSize
  465. if similarity < ra.SimilarityThreshold {
  466. cleanReturn = true
  467. return false, nil
  468. }
  469. similarity = (common * 100) / maxSize
  470. if similarity >= ra.SimilarityThreshold {
  471. cleanReturn = true
  472. return true, nil
  473. }
  474. }
  475. // the very last "overly optimistic" estimate was actually precise, so since we are still here
  476. // the blobs are similar
  477. cleanReturn = true
  478. return true, nil
  479. }
  480. func calcLinePositions(text string) []int {
  481. if text == "" {
  482. return []int{0}
  483. }
  484. lines := strings.Split(text, "\n")
  485. positions := make([]int, len(lines)+1)
  486. accum := 0
  487. for i, l := range lines {
  488. positions[i] = accum
  489. accum += len(l) + 1 // +1 for \n
  490. }
  491. if len(lines) > 0 && lines[len(lines)-1] != "\n" {
  492. accum--
  493. }
  494. positions[len(lines)] = accum
  495. return positions
  496. }
  497. func strToLiteralRunes(s string) []rune {
  498. lrunes := make([]rune, len(s))
  499. for i, b := range []byte(s) {
  500. lrunes[i] = rune(b)
  501. }
  502. return lrunes
  503. }
  504. type sortableChange struct {
  505. change *object.Change
  506. hash plumbing.Hash
  507. }
  508. type sortableChanges []sortableChange
  509. func (change *sortableChange) Less(other *sortableChange) bool {
  510. for x := 0; x < 20; x++ {
  511. if change.hash[x] < other.hash[x] {
  512. return true
  513. }
  514. }
  515. return false
  516. }
  517. func (slice sortableChanges) Len() int {
  518. return len(slice)
  519. }
  520. func (slice sortableChanges) Less(i, j int) bool {
  521. return slice[i].Less(&slice[j])
  522. }
  523. func (slice sortableChanges) Swap(i, j int) {
  524. slice[i], slice[j] = slice[j], slice[i]
  525. }
  526. type sortableBlob struct {
  527. change *object.Change
  528. size int64
  529. }
  530. type sortableBlobs []sortableBlob
  531. func (change *sortableBlob) Less(other *sortableBlob) bool {
  532. return change.size < other.size
  533. }
  534. func (slice sortableBlobs) Len() int {
  535. return len(slice)
  536. }
  537. func (slice sortableBlobs) Less(i, j int) bool {
  538. return slice[i].Less(&slice[j])
  539. }
  540. func (slice sortableBlobs) Swap(i, j int) {
  541. slice[i], slice[j] = slice[j], slice[i]
  542. }
  543. type candidateDistance struct {
  544. Candidate int
  545. Distance int
  546. }
  547. func sortRenameCandidates(candidates []int, origin string, nameGetter func(int) string) {
  548. distances := make([]candidateDistance, len(candidates))
  549. ctx := levenshtein.Context{}
  550. for i, x := range candidates {
  551. name := filepath.Base(nameGetter(x))
  552. distances[i] = candidateDistance{x, ctx.Distance(origin, name)}
  553. }
  554. sort.Slice(distances, func(i, j int) bool {
  555. return distances[i].Distance < distances[j].Distance
  556. })
  557. for i, cd := range distances {
  558. candidates[i] = cd.Candidate
  559. }
  560. }
  561. func init() {
  562. core.Registry.Register(&RenameAnalysis{})
  563. }