瀏覽代碼

WER calculation

Martin Thoma 11 年之前
父節點
當前提交
2d20945d12

+ 35 - 0
source-code/Pseudocode/WER-calculation/Makefile

@@ -0,0 +1,35 @@
+SOURCE = WER-calculation
+DELAY = 80
+DENSITY = 300
+WIDTH = 500
+
+make:
+	pdflatex $(SOURCE).tex -output-format=pdf
+	make clean
+
+clean:
+	rm -rf  $(TARGET) *.class *.html *.log *.aux *.data *.gnuplot
+
+gif:
+	pdfcrop $(SOURCE).pdf
+	convert -verbose -delay $(DELAY) -loop 0 -density $(DENSITY) $(SOURCE)-crop.pdf $(SOURCE).gif
+	make clean
+
+png:
+	make
+	make svg
+	inkscape $(SOURCE).svg -w $(WIDTH) --export-png=$(SOURCE).png
+
+transparentGif:
+	convert $(SOURCE).pdf -transparent white result.gif
+	make clean
+
+svg:
+	make
+	#inkscape $(SOURCE).pdf --export-plain-svg=$(SOURCE).svg
+	pdf2svg $(SOURCE).pdf $(SOURCE).svg
+	# Necessary, as pdf2svg does not always create valid svgs:
+	inkscape $(SOURCE).svg --export-plain-svg=$(SOURCE).svg
+	rsvg-convert -a -w $(WIDTH) -f svg $(SOURCE).svg -o $(SOURCE)2.svg
+	inkscape $(SOURCE)2.svg --export-plain-svg=$(SOURCE).svg
+	rm $(SOURCE)2.svg

+ 3 - 0
source-code/Pseudocode/WER-calculation/Readme.md

@@ -0,0 +1,3 @@
+Compiled example
+----------------
+![Example](WER-calculation.png)

二進制
source-code/Pseudocode/WER-calculation/WER-calculation.png


+ 50 - 0
source-code/Pseudocode/WER-calculation/WER-calculation.tex

@@ -0,0 +1,50 @@
+\documentclass{article}
+\usepackage[pdftex,active,tightpage]{preview}
+\setlength\PreviewBorder{2mm}
+
+\usepackage[utf8]{inputenc} % this is needed for umlauts
+\usepackage[ngerman]{babel} % this is needed for umlauts
+\usepackage[T1]{fontenc}    % this is needed for correct output of umlauts in pdf
+\usepackage{amssymb,amsmath,amsfonts} % nice math rendering
+\usepackage{braket} % needed for \Set
+\usepackage{algorithm,algpseudocode}
+
+\begin{document}
+\begin{preview}
+    \begin{algorithm}[H]
+        \begin{algorithmic}
+            \Function{WER}{Reference $r$, Hypophysis $h$}
+                \State int[$|r|+1$][$|h|+1$] $D$ \Comment{Initialisation}
+                \For{($i=0$; $\;i \leq |r|$; $\;i$++)}
+                    \For{($j=0$; $\;j \leq |h|$; $\;j$++)}
+                        \If{$i==0$}
+                            \State $D[0][j] \gets j$
+                        \ElsIf{$j==0$}
+                            \State $D[i][0] \gets i$
+                        \EndIf
+                    \EndFor
+                \EndFor
+
+                \State
+                \For{($i=1$; $\;i \leq |r|$; $\;i$++)} \Comment{Calculation}
+                    \For{($j=1$; $\;j \leq |h|$; $\;j$++)}
+                        \If{$r[i-1] == h[j-1]$}
+                            \State $D[i][j] \gets D[i-1][j-1]$
+                        \Else
+                            \State $sub \gets D[i-1][j-1] + 1$
+                            \State $ins \gets D[i][j-1] + 1$
+                            \State $del \gets D[i-1][j] + 1$
+                            \State $D[i][j] \gets \min(sub, ins, del)$
+                        \EndIf
+                    \EndFor
+                \EndFor
+
+                \State 
+                \State \Return $D[|r|][|h|]$
+            \EndFunction
+        \end{algorithmic}
+    \caption{Calculation of WER with Levenshtein distance}
+    \label{alg:seq1}
+    \end{algorithm}
+\end{preview}
+\end{document}

+ 42 - 0
source-code/Pseudocode/WER-calculation/wer.py

@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+
+def wer(r, h):
+    """
+        Calculation of WER with Levenshtein distance.
+        Works only for iterables up to 254 elements (uint8).
+        O(nm) time ans space complexity.
+
+        >>> wer("who is there".split(), "is there".split()) 
+        1
+        >>> wer("who is there".split(), "".split()) 
+        3
+        >>> wer("".split(), "who is there".split()) 
+        3
+    """
+    # initialisation
+    import numpy
+    d = numpy.zeros((len(r)+1)*(len(h)+1), dtype=numpy.uint8)
+    d = d.reshape((len(r)+1, len(h)+1))
+    for i in range(len(r)+1):
+        for j in range(len(h)+1):
+            if i == 0:
+                d[0][j] = j
+            elif j == 0:
+                d[i][0] = i
+
+    # computation
+    for i in range(1, len(r)+1):
+        for j in range(1, len(h)+1):
+            if r[i-1] == h[j-1]:
+                d[i][j] = d[i-1][j-1]
+            else:
+                substitution = d[i-1][j-1] + 1
+                insertion    = d[i][j-1] + 1
+                deletion     = d[i-1][j] + 1
+                d[i][j] = min(substitution, insertion, deletion)
+
+    return d[len(r)][len(h)]
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()