| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149 |
- \subsection{Write Math}
- \begin{frame}{write-math.com}
- \begin{itemize}
- \item a website where users can add labeled training data and unlabeled
- data which they want to classify. I call this data \enquote{recording}
- \begin{figure}[ht]
- \centering
- \subfloat{
- \includegraphics[height=0.1\textwidth]{../images/279952.pdf}
- }%
- \qquad
- \subfloat{
- \includegraphics[height=0.1\textwidth]{../images/281507.pdf}
- }%
- \qquad
- \subfloat{
- \includegraphics[height=0.1\textwidth]{../images/287612.pdf}
- }%
- \qquad
- \subfloat{
- \includegraphics[height=0.1\textwidth]{../images/292175.pdf}
- }%
- \caption*{4 recordings}
- \end{figure}
- \item works with desktop computers and touch devices
- \item symbol recognition can be done by multiple classifiers
- \item users can contribute formulas as recordings and as \LaTeX{} answers
- for recordings
- \item users can vote for \LaTeX{} answers:
- \Large $\leq$, $\leqq$, $\leqslant$, \dots \normalsize
- \item user who entered the recording can accept one answer
- \end{itemize}
- \end{frame}
- % \framedgraphic{Classify}{../images/classify.png}
- % \framedgraphic{Workflow}{../images/workflow.png}
- % \framedgraphic{User page}{../images/user-page.png}
- % \framedgraphic{Information about recordings}{../images/view.png}
- % \framedgraphic{Symbol page}{../images/symbol.png}
- % \framedgraphic{Training}{../images/train.png}
- % \framedgraphic{Ranking}{../images/ranking.png}
- \begin{frame}[fragile]{Statistics}
- \begin{itemize}
- \item 127 users with at least 5 recordings
- \item $\num{1111}$ symbols, but only $\num{369}$ used for experiments
- \item $\num{235831}$ recordings (e.g. $\num{3489}$ times \verb+\int+, but only 50 times \verb+X+)
- \end{itemize}
- \end{frame}
- % \begin{frame}{First classification worker}
- % \begin{itemize}
- % \item preprocessing: Scale to fit into unit square while keeping the aspect
- % ratio
- % \item applies greedy time warping
- % \item compares a new recording with every recording
- % in the database
- % \item[$\Rightarrow$] Classification time is in $\mathcal{O}(\text{recordings})$,
- % but we rather would like $\mathcal{O}(\text{symbols})$
- % \item the current server / workflow can only handle about 4000 recordings
- % \item[$\Rightarrow$] Another way to classify is necessary
- % \end{itemize}
- % \end{frame}
- \begin{frame}[fragile]{Handwriting Recognition Toolkit (hwrt)}
- \begin{itemize}
- \item Many preprocessing algorithms / features implemented
- \item Plug-in system for preprocessing algorithms / features
- \item Needs neural network toolkit
- \item Hosted at \url{https://github.com/MartinThoma/hwrt}
- \item Installable via \verb+pip+ (Python package installer):\\
- \verb+$ pip install hwrt+
- \end{itemize}
- \end{frame}
- \begin{frame}[fragile]{hwrt preprocessing configuration file}
- \begin{verbatim}
- data-source: raw-datasets/2014-08-26-20-14-data-raw.pickle
- queue:
- - RemoveDuplicateTime: null
- - ScaleAndShift:
- - max_width: 1.0
- - max_height: 1.0
- - center: true
- - SpaceEvenlyPerStroke:
- - kind: linear
- - number: 20
- \end{verbatim}
- \end{frame}
- \subsection{Preprocessing algorithms}
- \begin{frame}{Preprocessing algorithms}
- \begin{itemize}
- \item Normalizing
- \begin{itemize}
- \item Scaling
- \item Shifting
- \item Resampling
- \end{itemize}
- \item Noise reduction
- \begin{itemize}
- \item Smoothing (e.g. moving average)
- \item Dot reduction
- \item Filtering (by distance, speed or angle)
- \item Stroke connection
- \end{itemize}
- \end{itemize}
- \end{frame}
- \begin{frame}[fragile]{hwrt feature configuration file}
- \begin{verbatim}
- data-source: preprocessed/baseline
- data-multiplication:
- - Multiply:
- - nr: 1
- features:
- - ConstantPointCoordinates:
- - strokes: 4
- - points_per_stroke: 20
- - fill_empty_with: 0
- - pen_down: false
- \end{verbatim}
- \end{frame}
- \subsection{Features}
- \begin{frame}{Features}
- \begin{itemize}
- \item Local
- \begin{itemize}
- \item Coordinates
- \item Speed
- \item Binary pen pressure
- \item Direction
- \item Curvature
- \item Bitmap-environment
- \item Hat-Feature
- \end{itemize}
- \item Global
- \begin{itemize}
- \item \# of points
- \item \# of strokes
- \item Center point
- \item Bitmap
- \item Bounding box (width, height, time)
- \end{itemize}
- \end{itemize}
- \end{frame}
|