write-math.tex 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. \subsection{Write Math}
  2. \begin{frame}{write-math.com}
  3. \begin{itemize}
  4. \item a website where users can add labeled training data and unlabeled
  5. data which they want to classify. I call this data \enquote{recording}
  6. \begin{figure}[ht]
  7. \centering
  8. \subfloat{
  9. \includegraphics[height=0.1\textwidth]{../images/279952.pdf}
  10. }%
  11. \qquad
  12. \subfloat{
  13. \includegraphics[height=0.1\textwidth]{../images/281507.pdf}
  14. }%
  15. \qquad
  16. \subfloat{
  17. \includegraphics[height=0.1\textwidth]{../images/287612.pdf}
  18. }%
  19. \qquad
  20. \subfloat{
  21. \includegraphics[height=0.1\textwidth]{../images/292175.pdf}
  22. }%
  23. \caption*{4 recordings}
  24. \end{figure}
  25. \item works with desktop computers and touch devices
  26. \item symbol recognition can be done by multiple classifiers
  27. \item users can contribute formulas as recordings and as \LaTeX{} answers
  28. for recordings
  29. \item users can vote for \LaTeX{} answers:
  30. \Large $\leq$, $\leqq$, $\leqslant$, \dots \normalsize
  31. \item user who entered the recording can accept one answer
  32. \end{itemize}
  33. \end{frame}
  34. % \framedgraphic{Classify}{../images/classify.png}
  35. % \framedgraphic{Workflow}{../images/workflow.png}
  36. % \framedgraphic{User page}{../images/user-page.png}
  37. % \framedgraphic{Information about recordings}{../images/view.png}
  38. % \framedgraphic{Symbol page}{../images/symbol.png}
  39. % \framedgraphic{Training}{../images/train.png}
  40. % \framedgraphic{Ranking}{../images/ranking.png}
  41. \begin{frame}[fragile]{Statistics}
  42. \begin{itemize}
  43. \item 127 users with at least 5 recordings
  44. \item $\num{1111}$ symbols, but only $\num{369}$ used for experiments
  45. \item $\num{235831}$ recordings (e.g. $\num{3489}$ times \verb+\int+, but only 50 times \verb+X+)
  46. \end{itemize}
  47. \end{frame}
  48. % \begin{frame}{First classification worker}
  49. % \begin{itemize}
  50. % \item preprocessing: Scale to fit into unit square while keeping the aspect
  51. % ratio
  52. % \item applies greedy time warping
  53. % \item compares a new recording with every recording
  54. % in the database
  55. % \item[$\Rightarrow$] Classification time is in $\mathcal{O}(\text{recordings})$,
  56. % but we rather would like $\mathcal{O}(\text{symbols})$
  57. % \item the current server / workflow can only handle about 4000 recordings
  58. % \item[$\Rightarrow$] Another way to classify is necessary
  59. % \end{itemize}
  60. % \end{frame}
  61. \begin{frame}[fragile]{Handwriting Recognition Toolkit (hwrt)}
  62. \begin{itemize}
  63. \item Many preprocessing algorithms / features implemented
  64. \item Plug-in system for preprocessing algorithms / features
  65. \item Needs neural network toolkit
  66. \item Hosted at \url{https://github.com/MartinThoma/hwrt}
  67. \item Installable via \verb+pip+ (Python package installer):\\
  68. \verb+$ pip install hwrt+
  69. \end{itemize}
  70. \end{frame}
  71. \begin{frame}[fragile]{hwrt preprocessing configuration file}
  72. \begin{verbatim}
  73. data-source: raw-datasets/2014-08-26-20-14-data-raw.pickle
  74. queue:
  75. - RemoveDuplicateTime: null
  76. - ScaleAndShift:
  77. - max_width: 1.0
  78. - max_height: 1.0
  79. - center: true
  80. - SpaceEvenlyPerStroke:
  81. - kind: linear
  82. - number: 20
  83. \end{verbatim}
  84. \end{frame}
  85. \subsection{Preprocessing algorithms}
  86. \begin{frame}{Preprocessing algorithms}
  87. \begin{itemize}
  88. \item Normalizing
  89. \begin{itemize}
  90. \item Scaling
  91. \item Shifting
  92. \item Resampling
  93. \end{itemize}
  94. \item Noise reduction
  95. \begin{itemize}
  96. \item Smoothing (e.g. moving average)
  97. \item Dot reduction
  98. \item Filtering (by distance, speed or angle)
  99. \item Stroke connection
  100. \end{itemize}
  101. \end{itemize}
  102. \end{frame}
  103. \begin{frame}[fragile]{hwrt feature configuration file}
  104. \begin{verbatim}
  105. data-source: preprocessed/baseline
  106. data-multiplication:
  107. - Multiply:
  108. - nr: 1
  109. features:
  110. - ConstantPointCoordinates:
  111. - strokes: 4
  112. - points_per_stroke: 20
  113. - fill_empty_with: 0
  114. - pen_down: false
  115. \end{verbatim}
  116. \end{frame}
  117. \subsection{Features}
  118. \begin{frame}{Features}
  119. \begin{itemize}
  120. \item Local
  121. \begin{itemize}
  122. \item Coordinates
  123. \item Speed
  124. \item Binary pen pressure
  125. \item Direction
  126. \item Curvature
  127. \item Bitmap-environment
  128. \item Hat-Feature
  129. \end{itemize}
  130. \item Global
  131. \begin{itemize}
  132. \item \# of points
  133. \item \# of strokes
  134. \item Center point
  135. \item Bitmap
  136. \item Bounding box (width, height, time)
  137. \end{itemize}
  138. \end{itemize}
  139. \end{frame}