11 tahun lalu · 3a8564348e
--- a/presentations/Bachelor-Short/LaTeX/bachelor-short.tex
+++ b/presentations/Bachelor-Short/LaTeX/bachelor-short.tex
@@ -26,13 +26,20 @@
 
				 \section{What is my Bachelor's thesis about?}
			
 
				 \input{introduction}
			
 
				 
			
 
				-\section{What did I do so far?}
			
 
				-\input{work-done}
			
 
				+\section{write-math.com}
			
 
				+\input{write-math}
			
 
				+
			
 
				+\section{Preprocessing and Features}
			
 
				+\input{preprocessing}
			
 
				+\input{features}
			
 
				+
			
 
				+\section{Neural Nets}
			
 
				+\input{neural-nets}
			
 
				 
			
 
				 \section{What will I do next?}
			
 
				 \input{will-do}
			
 
				 
			
 
				-\section{End}
			
 
				+\section*{End}
			
 
				 \subsection{End}
			
 
				 \input{sources}
			
 
				 \framedgraphic{Thanks for Your Attention!}{../images/xi.png}
			
--- a/presentations/Bachelor-Short/LaTeX/features.tex
+++ b/presentations/Bachelor-Short/LaTeX/features.tex
@@ -0,0 +1,23 @@
 
				+\subsection{Features}
			
 
				+\begin{frame}{Features}
			
 
				+    \begin{itemize}
			
 
				+        \item Local
			
 
				+        \begin{itemize}
			
 
				+            \item Coordinates
			
 
				+            \item Speed
			
 
				+            \item Binary pen pressure
			
 
				+            \item Direction
			
 
				+            \item Curvature
			
 
				+            \item Bitmap-environment
			
 
				+            \item Hat-Feature
			
 
				+        \end{itemize}
			
 
				+        \item Global
			
 
				+        \begin{itemize}
			
 
				+            \item \# of points
			
 
				+            \item \# of strokes
			
 
				+            \item Center point
			
 
				+            \item Bitmap
			
 
				+            \item Bounding box (width, height, time)
			
 
				+        \end{itemize}
			
 
				+    \end{itemize}
			
 
				+\end{frame}
			
--- a/presentations/Bachelor-Short/LaTeX/introduction.tex
+++ b/presentations/Bachelor-Short/LaTeX/introduction.tex
@@ -2,9 +2,9 @@
 
				 
			
 
				 \begin{frame}{What is my Bachelor's thesis about?}
			
 
				     \begin{itemize}
			
 
				-        \item Recognition of handwritten mathematical formulas
			
 
				+        \item Recognition of handwritten mathematical symbols
			
 
				         \item On-line recognition, not OCR!
			
 
				-        \item Given a series of points $(x(t), y(t), b)$\\
			
 
				+        \item Given a series of points $(x(t), y(t), b(t))$\\
			
 
				               I want to get the proper \LaTeX{} code.
			
 
				     \end{itemize}
			
 
				 \end{frame}
			
@@ -16,13 +16,13 @@
 
				         \item It's much harder to find complete formulas.
			
 
				     \end{itemize}
			
 
				 
			
 
				-    I want to
			
 
				-    \begin{itemize}
			
 
				-        \item provide a tool that enables beginners to get the best \LaTeX{} code
			
 
				-              for their formula,
			
 
				-        \item find out what works best for symbol recognition
			
 
				-        \item and provide data and a platform to test new ideas for classifiers
			
 
				-    \end{itemize}
			
 
				+    % I want to
			
 
				+    % \begin{itemize}
			
 
				+    %     \item provide a tool that enables beginners to get the best \LaTeX{} code
			
 
				+    %           for their formula,
			
 
				+    %     \item find out what works best for symbol recognition
			
 
				+    %     \item and provide data and a platform to test new ideas for classifiers
			
 
				+    % \end{itemize}
			
 
				 
			
 
				     As soon as symbol recognition works good in terms of classification time and
			
 
				     performance, I will continue with formula recognition.
			
--- a/presentations/Bachelor-Short/LaTeX/neural-nets.tex
+++ b/presentations/Bachelor-Short/LaTeX/neural-nets.tex
@@ -0,0 +1,13 @@
 
				+\subsection{Neural Net experiments}
			
 
				+\begin{frame}{Experiments}
			
 
				+    \textbf{Preprocessing:} Scaling, shifting and linear interpolation\\
			
 
				+    \textbf{Features:} Coordinates of 80 points (4 Lines with 20 points each)\\
			
 
				+    \textbf{Learning:} MLP, 300 epochs, LR of 0.1
			
 
				+    \begin{itemize}
			
 
				+        \item[] \textit{toplogy       \tabto{6cm} error in training time}
			
 
				+        \item 160:500:369             \tabto{6cm} 30.62 \% in \hphantom{0}9min 08s
			
 
				+        \item 160:500:500:369         \tabto{6cm} 27.73 \% in 11min 49s
			
 
				+        \item 160:500:500:500:369     \tabto{6cm} 34.79 \% in 14min 09s
			
 
				+        \item 160:500:500:500:500:369 \tabto{6cm} 33.61 \% in 14min 06s
			
 
				+    \end{itemize}
			
 
				+\end{frame}
			
--- a/presentations/Bachelor-Short/LaTeX/preprocessing.tex
+++ b/presentations/Bachelor-Short/LaTeX/preprocessing.tex
@@ -0,0 +1,18 @@
 
				+\subsection{Preprocessing}
			
 
				+\begin{frame}{Preprocessing}
			
 
				+    \begin{itemize}
			
 
				+        \item Normalizing
			
 
				+        \begin{itemize}
			
 
				+            \item Scaling
			
 
				+            \item Shifting
			
 
				+            \item Resampling
			
 
				+        \end{itemize}
			
 
				+        \item Noise reduction
			
 
				+        \begin{itemize}
			
 
				+            \item Smoothing (e.g. moving average)
			
 
				+            \item Dot reduction
			
 
				+            \item Filtering (by distance, speed or angle)
			
 
				+            \item Stroke connection
			
 
				+        \end{itemize}
			
 
				+    \end{itemize}
			
 
				+\end{frame}
			
--- a/presentations/Bachelor-Short/LaTeX/will-do.tex
+++ b/presentations/Bachelor-Short/LaTeX/will-do.tex
@@ -1,32 +1,22 @@
 
				 \subsection{What will I do next?}
			
 
				 \begin{frame}{What will I do next?}
			
 
				     \begin{itemize}
			
 
				-        \item Get classification performance with cross-validation
			
 
				-        \item Implement neural net for classification
			
 
				-        \begin{itemize}
			
 
				-            \item preprocessing: compute cubic spline for each line
			
 
				-            \begin{itemize}
			
 
				-                \item equi-spaced points or
			
 
				-                \item get equi-timed points
			
 
				-            \end{itemize}
			
 
				-            \item 5 - 20 input neurons for each line
			
 
				-            \item 1076 output neurons (one for each symbol)
			
 
				-        \end{itemize}
			
 
				-        \item Get a language model (e.g. by parsing Wikipedia)
			
 
				-        \item Use ANN with HMM (?)
			
 
				+        \item Evaluate preprocessing steps
			
 
				+        \item Try other features
			
 
				+        \item Try other topologies / trainings (e.g. newbob)
			
 
				     \end{itemize}
			
 
				 \end{frame}
			
 
				 
			
 
				-\subsection{Far future}
			
 
				-\begin{frame}{What could be done?}
			
 
				-    \begin{itemize}
			
 
				-        \item Make use of audio data in a multimodal approach\\
			
 
				-              e.g. $R$ and $\mathcal{R}$
			
 
				-        \item Currently, the Lecture Translation system doesn't recognize math.\\
			
 
				-              You get \enquote{integral of e raised to the power of x d x} instead
			
 
				-              of $\int e^x \mathrm{d} x$.
			
 
				-        \item Spoken math is ambigous: $\sqrt{a+b}$ vs. $\sqrt{a} + b$
			
 
				-        \item The language model I create could help to find probable formulas
			
 
				-        \item The platform could be used to get more input data of users
			
 
				-    \end{itemize}
			
 
				-\end{frame}
			
 
				+% \subsection{Far future}
			
 
				+% \begin{frame}{What could be done?}
			
 
				+%     \begin{itemize}
			
 
				+%         \item Make use of audio data in a multimodal approach\\
			
 
				+%               e.g. $R$ and $\mathcal{R}$
			
 
				+%         \item Currently, the Lecture Translation system doesn't recognize math.\\
			
 
				+%               You get \enquote{integral of e raised to the power of x d x} instead
			
 
				+%               of $\int e^x \mathrm{d} x$.
			
 
				+%         \item Spoken math is ambigous: $\sqrt{a+b}$ vs. $\sqrt{a} + b$
			
 
				+%         \item The language model I create could help to find probable formulas
			
 
				+%         \item The platform could be used to get more input data of users
			
 
				+%     \end{itemize}
			
 
				+% \end{frame}
			
--- a/presentations/Bachelor-Short/LaTeX/work-done.tex
+++ b/presentations/Bachelor-Short/LaTeX/work-done.tex
@@ -13,25 +13,18 @@
 
				 
			
 
				 \framedgraphic{Classify}{../images/classify.png}
			
 
				 \framedgraphic{Workflow}{../images/workflow.png}
			
 
				-\framedgraphic{User page}{../images/user-page.png}
			
 
				-\framedgraphic{Information about handwritten-data}{../images/view.png}
			
 
				-\framedgraphic{Non-mathematical symbols}{../images/yinyang.png}
			
 
				-\framedgraphic{Training}{../images/train.png}
			
 
				+% \framedgraphic{User page}{../images/user-page.png}
			
 
				+% \framedgraphic{Information about recordings}{../images/view.png}
			
 
				+% \framedgraphic{Symbol page}{../images/symbol.png}
			
 
				+% \framedgraphic{Training}{../images/train.png}
			
 
				 \framedgraphic{Ranking}{../images/ranking.png}
			
 
				-\framedgraphic{Symbol page}{../images/symbol.png}
			
 
				 
			
 
				-\begin{frame}{Statistics}
			
 
				+
			
 
				+\begin{frame}[fragile]{Statistics}
			
 
				     \begin{itemize}
			
 
				-        \item 40 users
			
 
				-        \item 1076 symbols
			
 
				-        \item 5519 handwritten symbols (e.g. 195 times the letter \enquote{A})
			
 
				-        \begin{itemize}
			
 
				-            \item only 264 have 4 lines
			
 
				-            \item only 36 have 5 lines
			
 
				-            \item only 16 have 6 lines
			
 
				-            \item only 19 have 7 lines or more
			
 
				-            \item none has more than 12 lines
			
 
				-        \end{itemize}
			
 
				+        \item 127 users with at least 5 recordings
			
 
				+        \item 1109 symbols, but only 369 used for experiments
			
 
				+        \item $\num{235831}$ recordings (e.g. $\num{3486}$ times \verb+\int+)
			
 
				     \end{itemize}
			
 
				 \end{frame}
			
 
				 
			
@@ -40,12 +33,11 @@
 
				         \item preprocessing: Scale to fit into unit square while keeping the aspect
			
 
				               ratio
			
 
				         \item applies dynamic time warping
			
 
				-        \item compares a new handwritten symbol with every handwritten symbol
			
 
				+        \item compares a new recording with every recording
			
 
				               in the database
			
 
				-        \item[$\Rightarrow$] Classification time is in $\mathcal{O}(\text{handwritten symbols})$,
			
 
				+        \item[$\Rightarrow$] Classification time is in $\mathcal{O}(\text{recordings})$,
			
 
				               but we rather would like $\mathcal{O}(\text{symbols})$
			
 
				-        \item the current server / workflow can only handle about 4000 handwritten
			
 
				-              symbols
			
 
				+        \item the current server / workflow can only handle about 4000 recordings
			
 
				         \item[$\Rightarrow$] Another way to classify is necessary
			
 
				     \end{itemize}
			
 
				 \end{frame}
			
--- a/presentations/Bachelor-Short/images/ranking.png
+++ b/presentations/Bachelor-Short/images/ranking.png
--- a/presentations/Bachelor-Short/templates/myStyle.sty
+++ b/presentations/Bachelor-Short/templates/myStyle.sty
@@ -4,9 +4,11 @@
 
				 \InputIfFileExists{../templates/beamerthemekit.sty}{\usepackage{../templates/beamerthemekit}}{\usetheme{Frankfurt}}
			
 
				 \usefonttheme{professionalfonts}
			
 
				 
			
 
				+\usepackage{tabto}
			
 
				 \usepackage{hyperref}
			
 
				 \usepackage{lmodern}
			
 
				 \usepackage{listings}
			
 
				+\usepackage{siunitx}
			
 
				 \usepackage{wrapfig}        % see http://en.wikibooks.org/wiki/LaTeX/Floats,_Figures_and_Captions
			
 
				 \usepackage[utf8]{inputenc} % this is needed for german umlauts
			
 
				 \usepackage[english]{babel} % this is needed for german umlauts