hace 11 años · f3bc84df5d
--- a/presentations/Bachelor-Final-Presentation/LaTeX/Bachelor-Final-Presentation.pdf
+++ b/presentations/Bachelor-Final-Presentation/LaTeX/Bachelor-Final-Presentation.pdf
--- a/presentations/Bachelor-Final-Presentation/LaTeX/dot-reduction.png
+++ b/presentations/Bachelor-Final-Presentation/LaTeX/dot-reduction.png
--- a/presentations/Bachelor-Final-Presentation/LaTeX/interrupted-stroke.png
+++ b/presentations/Bachelor-Final-Presentation/LaTeX/interrupted-stroke.png
--- a/presentations/Bachelor-Final-Presentation/LaTeX/preprocessing.tex
+++ b/presentations/Bachelor-Final-Presentation/LaTeX/preprocessing.tex
@@ -1,5 +1,5 @@
 
				-\subsection{Preprocessing}
			
 
				-\begin{frame}{Preprocessing}
			
 
				+\subsection{Preprocessing Algorithms}
			
 
				+\begin{frame}{Preprocessing Algorithms}
			
 
				     \begin{columns}[T] % contents are top vertically aligned
			
 
				     \begin{column}[T]{5cm} % each column can also be its own environment
			
 
				         \begin{itemize}
			
@@ -14,7 +14,7 @@
 
				                 \item<4-> Smoothing (e.g. moving average)
			
 
				                 \item<5-> Dot reduction
			
 
				                 \item<6-> Filtering (by distance, speed or angle)
			
 
				-                \item<7-> Stroke connection
			
 
				+                \item<8-> Stroke connection
			
 
				             \end{itemize}
			
 
				         \end{itemize}
			
 
				     \end{column}
			
@@ -22,7 +22,10 @@
 
				         \only<2>{\includegraphics[width=6cm, keepaspectratio]{scale-and-shift.png}}
			
 
				         \only<3>{\includegraphics[width=6cm, keepaspectratio]{resampling.png}}
			
 
				         \only<4>{\includegraphics[width=6cm, keepaspectratio]{smooth-1-1-1.png}}
			
 
				-        \only<7>{\includegraphics[width=6cm, keepaspectratio]{interrupted-stroke.png}}
			
 
				+        \only<5>{\includegraphics[width=6cm, keepaspectratio]{dot-reduction.png}}
			
 
				+        \only<6>{\includegraphics[width=6cm, keepaspectratio]{wildpoint-1.png}}
			
 
				+        \only<7>{\includegraphics[width=6cm, keepaspectratio]{wildpoint-2.png}}
			
 
				+        \only<8>{\includegraphics[width=6cm, keepaspectratio]{interrupted-stroke.png}}
			
 
				     \end{column}
			
 
				     \end{columns}
			
 
				 
			
--- a/presentations/Bachelor-Final-Presentation/LaTeX/resampling.png
+++ b/presentations/Bachelor-Final-Presentation/LaTeX/resampling.png
--- a/presentations/Bachelor-Final-Presentation/LaTeX/scale-and-shift.png
+++ b/presentations/Bachelor-Final-Presentation/LaTeX/scale-and-shift.png
--- a/presentations/Bachelor-Final-Presentation/LaTeX/wildpoint-1.png
+++ b/presentations/Bachelor-Final-Presentation/LaTeX/wildpoint-1.png
--- a/presentations/Bachelor-Final-Presentation/LaTeX/wildpoint-2.png
+++ b/presentations/Bachelor-Final-Presentation/LaTeX/wildpoint-2.png
--- a/presentations/Bachelor-Final-Presentation/LaTeX/write-math.tex
+++ b/presentations/Bachelor-Final-Presentation/LaTeX/write-math.tex
@@ -1,149 +0,0 @@
 
				-\subsection{Write Math}
			
 
				-
			
 
				-\begin{frame}{write-math.com}
			
 
				-    \begin{itemize}
			
 
				-        \item a website where users can add labeled training data and unlabeled
			
 
				-              data which they want to classify. I call this data \enquote{recording}
			
 
				-        \begin{figure}[ht]
			
 
				-            \centering
			
 
				-            \subfloat{
			
 
				-                \includegraphics[height=0.1\textwidth]{../images/279952.pdf}
			
 
				-            }%
			
 
				-            \qquad
			
 
				-            \subfloat{
			
 
				-                \includegraphics[height=0.1\textwidth]{../images/281507.pdf}
			
 
				-            }%
			
 
				-            \qquad
			
 
				-            \subfloat{
			
 
				-                \includegraphics[height=0.1\textwidth]{../images/287612.pdf}
			
 
				-            }%
			
 
				-            \qquad
			
 
				-            \subfloat{
			
 
				-                \includegraphics[height=0.1\textwidth]{../images/292175.pdf}
			
 
				-            }%
			
 
				-            \caption*{4 recordings}
			
 
				-        \end{figure}
			
 
				-        \item works with desktop computers and touch devices
			
 
				-        \item symbol recognition can be done by multiple classifiers
			
 
				-        \item users can contribute formulas as recordings and as \LaTeX{} answers
			
 
				-              for recordings
			
 
				-        \item users can vote for \LaTeX{} answers:
			
 
				-              \Large $\leq$, $\leqq$, $\leqslant$, \dots \normalsize
			
 
				-        \item user who entered the recording can accept one answer
			
 
				-    \end{itemize}
			
 
				-\end{frame}
			
 
				-
			
 
				-% \framedgraphic{Classify}{../images/classify.png}
			
 
				-% \framedgraphic{Workflow}{../images/workflow.png}
			
 
				-% \framedgraphic{User page}{../images/user-page.png}
			
 
				-% \framedgraphic{Information about recordings}{../images/view.png}
			
 
				-% \framedgraphic{Symbol page}{../images/symbol.png}
			
 
				-% \framedgraphic{Training}{../images/train.png}
			
 
				-% \framedgraphic{Ranking}{../images/ranking.png}
			
 
				-
			
 
				-
			
 
				-\begin{frame}[fragile]{Statistics}
			
 
				-    \begin{itemize}
			
 
				-        \item 127 users with at least 5 recordings
			
 
				-        \item $\num{1111}$ symbols, but only $\num{369}$ used for experiments
			
 
				-        \item $\num{235831}$ recordings (e.g. $\num{3489}$ times \verb+\int+, but only 50 times \verb+X+)
			
 
				-    \end{itemize}
			
 
				-\end{frame}
			
 
				-
			
 
				-% \begin{frame}{First classification worker}
			
 
				-%     \begin{itemize}
			
 
				-%         \item preprocessing: Scale to fit into unit square while keeping the aspect
			
 
				-%               ratio
			
 
				-%         \item applies greedy time warping
			
 
				-%         \item compares a new recording with every recording
			
 
				-%               in the database
			
 
				-%         \item[$\Rightarrow$] Classification time is in $\mathcal{O}(\text{recordings})$,
			
 
				-%               but we rather would like $\mathcal{O}(\text{symbols})$
			
 
				-%         \item the current server / workflow can only handle about 4000 recordings
			
 
				-%         \item[$\Rightarrow$] Another way to classify is necessary
			
 
				-%     \end{itemize}
			
 
				-% \end{frame}
			
 
				-
			
 
				-\begin{frame}[fragile]{Handwriting Recognition Toolkit (hwrt)}
			
 
				-    \begin{itemize}
			
 
				-        \item Many preprocessing algorithms / features implemented
			
 
				-        \item Plug-in system for preprocessing algorithms / features
			
 
				-        \item Needs neural network toolkit
			
 
				-        \item Hosted at \url{https://github.com/MartinThoma/hwrt}
			
 
				-        \item Installable via \verb+pip+ (Python package installer):\\
			
 
				-              \verb+$ pip install hwrt+
			
 
				-    \end{itemize}
			
 
				-\end{frame}
			
 
				-
			
 
				-\begin{frame}[fragile]{hwrt preprocessing configuration file}
			
 
				-    \begin{verbatim}
			
 
				-data-source: raw-datasets/2014-08-26-20-14-data-raw.pickle
			
 
				-queue:
			
 
				-  - RemoveDuplicateTime: null
			
 
				-  - ScaleAndShift:
			
 
				-      - max_width: 1.0
			
 
				-      - max_height: 1.0
			
 
				-      - center: true
			
 
				-  - SpaceEvenlyPerStroke:
			
 
				-      - kind: linear
			
 
				-      - number: 20
			
 
				-    \end{verbatim}
			
 
				-\end{frame}
			
 
				-
			
 
				-\subsection{Preprocessing algorithms}
			
 
				-\begin{frame}{Preprocessing algorithms}
			
 
				-    \begin{itemize}
			
 
				-        \item Normalizing
			
 
				-        \begin{itemize}
			
 
				-            \item Scaling
			
 
				-            \item Shifting
			
 
				-            \item Resampling
			
 
				-        \end{itemize}
			
 
				-        \item Noise reduction
			
 
				-        \begin{itemize}
			
 
				-            \item Smoothing (e.g. moving average)
			
 
				-            \item Dot reduction
			
 
				-            \item Filtering (by distance, speed or angle)
			
 
				-            \item Stroke connection
			
 
				-        \end{itemize}
			
 
				-    \end{itemize}
			
 
				-\end{frame}
			
 
				-
			
 
				-\begin{frame}[fragile]{hwrt feature configuration file}
			
 
				-    \begin{verbatim}
			
 
				-data-source: preprocessed/baseline
			
 
				-data-multiplication:
			
 
				-  - Multiply:
			
 
				-      - nr: 1
			
 
				-features:
			
 
				-  - ConstantPointCoordinates:
			
 
				-      - strokes: 4
			
 
				-      - points_per_stroke: 20
			
 
				-      - fill_empty_with: 0
			
 
				-      - pen_down: false
			
 
				-    \end{verbatim}
			
 
				-\end{frame}
			
 
				-
			
 
				-\subsection{Features}
			
 
				-\begin{frame}{Features}
			
 
				-    \begin{itemize}
			
 
				-        \item Local
			
 
				-        \begin{itemize}
			
 
				-            \item Coordinates
			
 
				-            \item Speed
			
 
				-            \item Binary pen pressure
			
 
				-            \item Direction
			
 
				-            \item Curvature
			
 
				-            \item Bitmap-environment
			
 
				-            \item Hat-Feature
			
 
				-        \end{itemize}
			
 
				-        \item Global
			
 
				-        \begin{itemize}
			
 
				-            \item \# of points
			
 
				-            \item \# of strokes
			
 
				-            \item Center point
			
 
				-            \item Bitmap
			
 
				-            \item Bounding box (width, height, time)
			
 
				-        \end{itemize}
			
 
				-    \end{itemize}
			
 
				-\end{frame}