1234567891011121314151617181920212223242526272829303132333435363738394041424344 |
- #!/usr/bin/env python
- def wer(r, h):
- """
- Calculation of WER with Levenshtein distance.
- Works only for iterables up to 254 elements (uint8).
- O(nm) time ans space complexity.
- >>> wer("who is there".split(), "is there".split())
- 1
- >>> wer("who is there".split(), "".split())
- 3
- >>> wer("".split(), "who is there".split())
- 3
- """
- # initialisation
- import numpy
- d = numpy.zeros((len(r)+1)*(len(h)+1), dtype=numpy.uint8)
- d = d.reshape((len(r)+1, len(h)+1))
- for i in range(len(r)+1):
- for j in range(len(h)+1):
- if i == 0:
- d[0][j] = j
- elif j == 0:
- d[i][0] = i
- # computation
- for i in range(1, len(r)+1):
- for j in range(1, len(h)+1):
- if r[i-1] == h[j-1]:
- d[i][j] = d[i-1][j-1]
- else:
- substitution = d[i-1][j-1] + 1
- insertion = d[i][j-1] + 1
- deletion = d[i-1][j] + 1
- d[i][j] = min(substitution, insertion, deletion)
- return d[len(r)][len(h)]
- if __name__ == "__main__":
- import doctest
- doctest.testmod()
|