levenshtein.go 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. // Copyright (c) 2015, Arbo von Monkiewitsch All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license.
  4. package levenshtein
  5. // Context is the object which allows to calculate the Levenshtein distance
  6. // with Distance() method. It is needed to ensure 0 memory allocations.
  7. type Context struct {
  8. intSlice []int
  9. }
  10. func (c *Context) getIntSlice(l int) []int {
  11. if cap(c.intSlice) < l {
  12. c.intSlice = make([]int, l)
  13. }
  14. return c.intSlice[:l]
  15. }
  16. // Distance calculates the Levenshtein distance between two strings which
  17. // is defined as the minimum number of edits needed to transform one string
  18. // into the other, with the allowable edit operations being insertion, deletion,
  19. // or substitution of a single character
  20. // http://en.wikipedia.org/wiki/Levenshtein_distance
  21. //
  22. // This implementation is optimized to use O(min(m,n)) space.
  23. // It is based on the optimized C version found here:
  24. // http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#C
  25. func (c *Context) Distance(str1, str2 string) int {
  26. s1 := []rune(str1)
  27. s2 := []rune(str2)
  28. lenS1 := len(s1)
  29. lenS2 := len(s2)
  30. if lenS2 == 0 {
  31. return lenS1
  32. }
  33. column := c.getIntSlice(lenS1 + 1)
  34. // Column[0] will be initialised at the start of the first loop before it
  35. // is read, unless lenS2 is zero, which we deal with above
  36. for i := 1; i <= lenS1; i++ {
  37. column[i] = i
  38. }
  39. for x := 0; x < lenS2; x++ {
  40. s2Rune := s2[x]
  41. column[0] = x + 1
  42. lastdiag := x
  43. for y := 0; y < lenS1; y++ {
  44. olddiag := column[y+1]
  45. cost := 0
  46. if s1[y] != s2Rune {
  47. cost = 1
  48. }
  49. column[y+1] = min(
  50. column[y+1]+1,
  51. column[y]+1,
  52. lastdiag+cost,
  53. )
  54. lastdiag = olddiag
  55. }
  56. }
  57. return column[lenS1]
  58. }
  59. func min(a, b, c int) int {
  60. if a < b {
  61. if a < c {
  62. return a
  63. }
  64. } else {
  65. if b < c {
  66. return b
  67. }
  68. }
  69. return c
  70. }