8 rokov pred · e03eaa277b
--- a/documents/cv-curriculum-vitae/cv-curriculum-vitae.tex
+++ b/documents/cv-curriculum-vitae/cv-curriculum-vitae.tex
@@ -164,22 +164,6 @@ and a big, but algorithmically not challenging project. To be honest,
 
				 I only fixed some Java bugs.}\\
			
 
				 
			
 
				 %----------------------------------------------------------------------------------------
			
 
				-% WORK EXPERIENCE -2-
			
 
				-
			
 
				-{\raggedleft\textsc{2011}\par}
			
 
				-
			
 
				-{\raggedright\large Student research assistant at \textsc{ Institute of Toxicology and Genetics}, KIT\\
			
 
				-\textit{participating in a university research project}\\[5pt]}
			
 
				-
			
 
				-\normalsize{In summer 2011 I worked for over a month for a
			
 
				-research project at KIT. I have written bash scripts for file
			
 
				-conversions, fixed some bugs and re-written a slow Mathematica script
			
 
				-in a much faster Python version. But it quickly turned out that
			
 
				-this project had a lot of C++ source which was rarely commented or
			
 
				-documented. I realized, that I wouldn't have time for this project
			
 
				-after beginning my studies at university.}\\
			
 
				-
			
 
				-%----------------------------------------------------------------------------------------
			
 
				 % WORK EXPERIENCE -4-
			
 
				 
			
 
				 %{\raggedleft\textsc{2010}\par}
			
@@ -208,7 +192,7 @@ after beginning my studies at university.}\\
 
				 
			
 
				 \colorbox{shade}{\textcolor{text1}{
			
 
				 \begin{tabular}{c|p{7cm}}
			
 
				-\raisebox{-4pt}{\textifsymbol{18}} & Parkstraße 17, 76131 Karlsruhe \\ % Address
			
 
				+\raisebox{-4pt}{\textifsymbol{18}} & Alte Allee 107, 81245 Munich \\ % Address
			
 
				 \raisebox{-3pt}{\Mobilefone} & +49 $($1636$)$ 28 04 91 \\ % Phone number
			
 
				 \raisebox{-1pt}{\Letter} & \href{mailto:info@martin-thoma.de}{info@martin-thoma.de} \\ % Email address
			
 
				 \Keyboard & \href{http://martin-thoma.com}{martin-thoma.com} \\ % Website
			
@@ -332,6 +316,22 @@ Good Knowledge          & \textsc{Python}\\ \\
 
				 
			
 
				 \section{Work Experience}
			
 
				 %----------------------------------------------------------------------------------------
			
 
				+% WORK EXPERIENCE -2-
			
 
				+
			
 
				+{\raggedleft\textsc{2011}\par}
			
 
				+
			
 
				+{\raggedright\large Student research assistant at \textsc{ Institute of Toxicology and Genetics}, KIT\\
			
 
				+\textit{participating in a university research project}\\[5pt]}
			
 
				+
			
 
				+\normalsize{In summer 2011 I worked for over a month for a
			
 
				+research project at KIT. I have written bash scripts for file
			
 
				+conversions, fixed some bugs and re-written a slow Mathematica script
			
 
				+in a much faster Python version. But it quickly turned out that
			
 
				+this project had a lot of C++ source which was rarely commented or
			
 
				+documented. I realized, that I wouldn't have time for this project
			
 
				+after beginning my studies at university.}\\
			
 
				+
			
 
				+%----------------------------------------------------------------------------------------
			
 
				 % WORK EXPERIENCE -3-
			
 
				 
			
 
				 {\raggedleft\textsc{since 2011}\par}
			
--- a/documents/math-minimal-distance-to-cubic-function/math-minimal-distance-to-cubic-function.pdf
+++ b/documents/math-minimal-distance-to-cubic-function/math-minimal-distance-to-cubic-function.pdf
--- a/publications/activation-functions/abstract.tex
+++ b/publications/activation-functions/abstract.tex
@@ -1,7 +1,8 @@
 
				 \begin{abstract}

			
 
				 This paper reviews the most common activation functions for convolution neural

			
 
				-networks. They are evaluated on TODO dataset and possible reasons for the

			
 
				-differences in their performance are given.

			
 
				+networks. They are evaluated on the Asirra, GTSRB, HASYv2, STL-10, CIFAR-10,

			
 
				+CIFAR-100 and MNIST dataset. Possible reasons for the differences in their

			
 
				+performance are given.

			
 
				 

			
 
				-New state of the art results are achieved for TODO.

			
 
				+New state of the art results are achieved for Asirra, GTSRB, HASYv2 and STL-10.

			
 
				 \end{abstract}

			
--- a/publications/activation-functions/appendix.tex
+++ b/publications/activation-functions/appendix.tex
@@ -7,17 +7,17 @@
 
				     \centering
			
 
				     \hspace*{-1cm}\begin{tabular}{lllll}
			
 
				     \toprule
			
 
				-    Name                     & Function $\varphi(x)$ & Range of Values & $\varphi'(x)$ \\\midrule % & Used by 
			
 
				-    Sign function$^\dagger$  & $\begin{cases}+1 &\text{if } x \geq 0\\-1 &\text{if } x < 0\end{cases}$ & $\Set{-1,1}$                              & $0$                    \\%& \cite{971754} \\
			
 
				-    \parbox[t]{2.6cm}{Heaviside\\step function$^\dagger$} & $\begin{cases}+1 &\text{if } x > 0\\0 &\text{if } x < 0\end{cases}$ & $\Set{0, 1}$  & $0$                       \\%& \cite{mcculloch1943logical}\\
			
 
				-    Logistic function        & $\frac{1}{1+e^{-x}}$                           & $[0, 1]$                                                        & $\frac{e^x}{(e^x +1)^2}$  \\%& \cite{duch1999survey} \\
			
 
				-    Tanh                     & $\frac{e^x - e^{-x}}{e^x + e^{-x}} = \tanh(x)$ & $[-1, 1]$                                                       & $\sech^2(x)$              \\%& \cite{LeNet-5,Thoma:2014}\\
			
 
				-    \gls{ReLU}$^\dagger$           & $\max(0, x)$                                   & $[0, +\infty)$                                                  & $\begin{cases}1 &\text{if } x > 0\\0 &\text{if } x < 0\end{cases}$      \\%& \cite{AlexNet-2012}\\
			
 
				-    \parbox[t]{2.6cm}{\gls{LReLU}$^\dagger$\footnotemark\\(\gls{PReLU})} & $\varphi(x) = \max(\alpha x, x)$                        & $(-\infty, +\infty)$                                             & $\begin{cases}1 &\text{if } x > 0\\\alpha &\text{if } x < 0\end{cases}$ \\%& \cite{maas2013rectifier,he2015delving} \\
			
 
				-    Softplus                 & $\log(e^x + 1)$                                & $(0, +\infty)$                                       & $\frac{e^x}{e^x + 1}$    \\%& \cite{dugas2001incorporating,glorot2011deep} \\
			
 
				-    \gls{ELU}                & $\begin{cases}x &\text{if } x > 0\\\alpha (e^x - 1) &\text{if } x \leq 0\end{cases}$ & $(-\infty, +\infty)$ & $\begin{cases}1 &\text{if } x > 0\\\alpha e^x &\text{otherwise}\end{cases}$ \\%& \cite{clevert2015fast} \\
			
 
				-    Softmax$^\ddagger$       & $o(\mathbf{x})_j = \frac{e^{x_j}}{\sum_{k=1}^K e^{x_k}}$    & $[0, 1]^K$                                           & $o(\mathbf{x})_j \cdot \frac{\sum_{k=1}^K e^{x_k} - e^{x_j}}{\sum_{k=1}^K e^{x_k}}$          \\%& \cite{AlexNet-2012,Thoma:2014}\\
			
 
				-    Maxout$^\ddagger$        & $o(\mathbf{x}) = \max_{x \in \mathbf{x}} x$                 & $(-\infty, +\infty)$                                 & $\begin{cases}1 &\text{if } x_i = \max \mathbf{x}\\0 &\text{otherwise}\end{cases}$          \\%& \cite{goodfellow2013maxout}       \\
			
 
				+    Name                     & Function $\varphi(x)$ & Range of Values & $\varphi'(x)$ & Used by \\\midrule % 
			
 
				+    Sign function$^\dagger$  & $\begin{cases}+1 &\text{if } x \geq 0\\-1 &\text{if } x < 0\end{cases}$ & $\Set{-1,1}$                              & $0$                    & \cite{971754} \\
			
 
				+    \parbox[t]{2.6cm}{Heaviside\\step function$^\dagger$} & $\begin{cases}+1 &\text{if } x > 0\\0 &\text{if } x < 0\end{cases}$ & $\Set{0, 1}$  & $0$                       & \cite{mcculloch1943logical}\\
			
 
				+    Logistic function        & $\frac{1}{1+e^{-x}}$                           & $[0, 1]$                                                        & $\frac{e^x}{(e^x +1)^2}$  & \cite{duch1999survey} \\
			
 
				+    Tanh                     & $\frac{e^x - e^{-x}}{e^x + e^{-x}} = \tanh(x)$ & $[-1, 1]$                                                       & $\sech^2(x)$              & \cite{LeNet-5,Thoma:2014}\\
			
 
				+    \gls{ReLU}$^\dagger$           & $\max(0, x)$                                   & $[0, +\infty)$                                                  & $\begin{cases}1 &\text{if } x > 0\\0 &\text{if } x < 0\end{cases}$      & \cite{AlexNet-2012}\\
			
 
				+    \parbox[t]{2.6cm}{\gls{LReLU}$^\dagger$\footnotemark\\(\gls{PReLU})} & $\varphi(x) = \max(\alpha x, x)$                        & $(-\infty, +\infty)$                                             & $\begin{cases}1 &\text{if } x > 0\\\alpha &\text{if } x < 0\end{cases}$ & \cite{maas2013rectifier,he2015delving} \\
			
 
				+    Softplus                 & $\log(e^x + 1)$                                & $(0, +\infty)$                                       & $\frac{e^x}{e^x + 1}$    & \cite{dugas2001incorporating,glorot2011deep} \\
			
 
				+    \gls{ELU}                & $\begin{cases}x &\text{if } x > 0\\\alpha (e^x - 1) &\text{if } x \leq 0\end{cases}$ & $(-\infty, +\infty)$ & $\begin{cases}1 &\text{if } x > 0\\\alpha e^x &\text{otherwise}\end{cases}$ & \cite{clevert2015fast} \\
			
 
				+    Softmax$^\ddagger$       & $o(\mathbf{x})_j = \frac{e^{x_j}}{\sum_{k=1}^K e^{x_k}}$    & $[0, 1]^K$                                           & $o(\mathbf{x})_j \cdot \frac{\sum_{k=1}^K e^{x_k} - e^{x_j}}{\sum_{k=1}^K e^{x_k}}$         & \cite{AlexNet-2012,Thoma:2014}\\
			
 
				+    Maxout$^\ddagger$        & $o(\mathbf{x}) = \max_{x \in \mathbf{x}} x$                 & $(-\infty, +\infty)$                                 & $\begin{cases}1 &\text{if } x_i = \max \mathbf{x}\\0 &\text{otherwise}\end{cases}$          & \cite{goodfellow2013maxout}       \\
			
 
				     \bottomrule
			
 
				     \end{tabular}
			
 
				     \caption[Activation functions]{Overview of activation functions. Functions
			
@@ -63,13 +63,11 @@
 
				     \end{tabular}
			
 
				     \caption[Activation function evaluation results on CIFAR-100]{Training and
			
 
				              test accuracy of adjusted baseline models trained with different
			
 
				-             activation functions on CIFAR-100. For LReLU, $\alpha = 0.3$ was
			
 
				+             activation functions on CIFAR-100. For \gls{LReLU}, $\alpha = 0.3$ was
			
 
				              chosen.}
			
 
				     \label{table:CIFAR-100-accuracies-activation-functions}
			
 
				 \end{table}
			
 
				 
			
 
				-\glsreset{LReLU}
			
 
				-
			
 
				 \begin{table}[H]
			
 
				     \centering
			
 
				     \setlength\tabcolsep{1.5pt}
			
@@ -91,7 +89,7 @@
 
				     \end{tabular}
			
 
				     \caption[Activation function evaluation results on HASYv2]{Test accuracy of
			
 
				              adjusted baseline models trained with different activation
			
 
				-             functions on HASYv2. For LReLU, $\alpha = 0.3$ was chosen.}
			
 
				+             functions on HASYv2. For \gls{LReLU}, $\alpha = 0.3$ was chosen.}
			
 
				     \label{table:HASYv2-accuracies-activation-functions}
			
 
				 \end{table}
			
 
				 
			
@@ -116,8 +114,93 @@
 
				     \end{tabular}
			
 
				     \caption[Activation function evaluation results on STL-10]{Test accuracy of
			
 
				              adjusted baseline models trained with different activation
			
 
				-             functions on STL-10. For LReLU, $\alpha = 0.3$ was chosen.}
			
 
				+             functions on STL-10. For \gls{LReLU}, $\alpha = 0.3$ was chosen.}
			
 
				     \label{table:STL-10-accuracies-activation-functions}
			
 
				 \end{table}
			
 
				 
			
 
				+\begin{table}[H]
			
 
				+    \centering
			
 
				+    \hspace*{-1cm}\begin{tabular}{lllll}
			
 
				+    \toprule
			
 
				+    Name                     & Function $\varphi(x)$ & Range of Values & $\varphi'(x)$ \\\midrule % & Used by 
			
 
				+    Sign function$^\dagger$  & $\begin{cases}+1 &\text{if } x \geq 0\\-1 &\text{if } x < 0\end{cases}$ & $\Set{-1,1}$                              & $0$                    \\%& \cite{971754} \\
			
 
				+    \parbox[t]{2.6cm}{Heaviside\\step function$^\dagger$} & $\begin{cases}+1 &\text{if } x > 0\\0 &\text{if } x < 0\end{cases}$ & $\Set{0, 1}$  & $0$                       \\%& \cite{mcculloch1943logical}\\
			
 
				+    Logistic function        & $\frac{1}{1+e^{-x}}$                           & $[0, 1]$                                                        & $\frac{e^x}{(e^x +1)^2}$  \\%& \cite{duch1999survey} \\
			
 
				+    Tanh                     & $\frac{e^x - e^{-x}}{e^x + e^{-x}} = \tanh(x)$ & $[-1, 1]$                                                       & $\sech^2(x)$              \\%& \cite{LeNet-5,Thoma:2014}\\
			
 
				+    \gls{ReLU}$^\dagger$           & $\max(0, x)$                                   & $[0, +\infty)$                                                  & $\begin{cases}1 &\text{if } x > 0\\0 &\text{if } x < 0\end{cases}$      \\%& \cite{AlexNet-2012}\\
			
 
				+    \parbox[t]{2.6cm}{\gls{LReLU}$^\dagger$\footnotemark\\(\gls{PReLU})} & $\varphi(x) = \max(\alpha x, x)$                        & $(-\infty, +\infty)$                                             & $\begin{cases}1 &\text{if } x > 0\\\alpha &\text{if } x < 0\end{cases}$ \\%& \cite{maas2013rectifier,he2015delving} \\
			
 
				+    Softplus                 & $\log(e^x + 1)$                                & $(0, +\infty)$                                       & $\frac{e^x}{e^x + 1}$    \\%& \cite{dugas2001incorporating,glorot2011deep} \\
			
 
				+    \gls{ELU}                & $\begin{cases}x &\text{if } x > 0\\\alpha (e^x - 1) &\text{if } x \leq 0\end{cases}$ & $(-\infty, +\infty)$ & $\begin{cases}1 &\text{if } x > 0\\\alpha e^x &\text{otherwise}\end{cases}$ \\%& \cite{clevert2015fast} \\
			
 
				+    Softmax$^\ddagger$       & $o(\mathbf{x})_j = \frac{e^{x_j}}{\sum_{k=1}^K e^{x_k}}$    & $[0, 1]^K$                                           & $o(\mathbf{x})_j \cdot \frac{\sum_{k=1}^K e^{x_k} - e^{x_j}}{\sum_{k=1}^K e^{x_k}}$          \\%& \cite{AlexNet-2012,Thoma:2014}\\
			
 
				+    Maxout$^\ddagger$        & $o(\mathbf{x}) = \max_{x \in \mathbf{x}} x$                 & $(-\infty, +\infty)$                                 & $\begin{cases}1 &\text{if } x_i = \max \mathbf{x}\\0 &\text{otherwise}\end{cases}$          \\%& \cite{goodfellow2013maxout}       \\
			
 
				+    \bottomrule
			
 
				+    \end{tabular}
			
 
				+    \caption[Activation functions]{Overview of activation functions. Functions
			
 
				+             marked with $\dagger$ are not differentiable at 0 and functions
			
 
				+             marked with $\ddagger$ operate on all elements of a layer
			
 
				+             simultaneously. The hyperparameters $\alpha \in (0, 1)$ of Leaky
			
 
				+             ReLU and ELU are typically $\alpha = 0.01$. Other activation
			
 
				+             function like randomized leaky ReLUs exist~\cite{xu2015empirical},
			
 
				+             but are far less commonly used.\\
			
 
				+             Some functions are smoothed versions of others, like the logistic
			
 
				+             function for the Heaviside step function, tanh for the sign
			
 
				+             function, softplus for ReLU.\\
			
 
				+             Softmax is the standard activation function for the last layer of
			
 
				+             a classification network as it produces a probability
			
 
				+             distribution. See \Cref{fig:activation-functions-plot} for a plot
			
 
				+             of some of them.}
			
 
				+    \label{table:activation-functions-overview}
			
 
				+\end{table}
			
 
				+\footnotetext{$\alpha$ is a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function.}
			
 
				+
			
 
				+\begin{figure}[ht]
			
 
				+    \centering
			
 
				+    \begin{tikzpicture}
			
 
				+        \definecolor{color1}{HTML}{E66101}
			
 
				+        \definecolor{color2}{HTML}{FDB863}
			
 
				+        \definecolor{color3}{HTML}{B2ABD2}
			
 
				+        \definecolor{color4}{HTML}{5E3C99}
			
 
				+        \begin{axis}[
			
 
				+            legend pos=north west,
			
 
				+            legend cell align={left},
			
 
				+            axis x line=middle,
			
 
				+            axis y line=middle,
			
 
				+            x tick label style={/pgf/number format/fixed,
			
 
				+                                /pgf/number format/fixed zerofill,
			
 
				+                                /pgf/number format/precision=1},
			
 
				+            y tick label style={/pgf/number format/fixed,
			
 
				+                                /pgf/number format/fixed zerofill,
			
 
				+                                /pgf/number format/precision=1},
			
 
				+            grid = major,
			
 
				+            width=16cm,
			
 
				+            height=8cm,
			
 
				+            grid style={dashed, gray!30},
			
 
				+            xmin=-2,     % start the diagram at this x-coordinate
			
 
				+            xmax= 2,     % end   the diagram at this x-coordinate
			
 
				+            ymin=-1,     % start the diagram at this y-coordinate
			
 
				+            ymax= 2,     % end   the diagram at this y-coordinate
			
 
				+            xlabel=x,
			
 
				+            ylabel=y,
			
 
				+            tick align=outside,
			
 
				+            enlargelimits=false]
			
 
				+          \addplot[domain=-2:2, color1, ultra thick,samples=500] {1/(1+exp(-x))};
			
 
				+          \addplot[domain=-2:2, color2, ultra thick,samples=500] {tanh(x)};
			
 
				+          \addplot[domain=-2:2, color4, ultra thick,samples=500] {max(0, x)};
			
 
				+          \addplot[domain=-2:2, color4, ultra thick,samples=500, dashed] {ln(exp(x) + 1)};
			
 
				+          \addplot[domain=-2:2, color3, ultra thick,samples=500, dotted] {max(x, exp(x) - 1)};
			
 
				+          \addlegendentry{$\varphi_1(x)=\frac{1}{1+e^{-x}}$}
			
 
				+          \addlegendentry{$\varphi_2(x)=\tanh(x)$}
			
 
				+          \addlegendentry{$\varphi_3(x)=\max(0, x)$}
			
 
				+          \addlegendentry{$\varphi_4(x)=\log(e^x + 1)$}
			
 
				+          \addlegendentry{$\varphi_5(x)=\max(x, e^x - 1)$}
			
 
				+        \end{axis}
			
 
				+    \end{tikzpicture}
			
 
				+    \caption[Activation functions]{Activation functions plotted in $[-2, +2]$.
			
 
				+             $\tanh$ and ELU are able to produce negative numbers. The image of
			
 
				+             ELU, ReLU and Softplus is not bound on the positive side, whereas
			
 
				+             $\tanh$ and the logistic function are always below~1.}
			
 
				+    \label{fig:activation-functions-plot}
			
 
				+\end{figure}
			
 
				+
			
 
				+\glsreset{LReLU}
			
 
				 \twocolumn
			
--- a/publications/activation-functions/content.tex
+++ b/publications/activation-functions/content.tex
@@ -1,24 +1,42 @@
 
				 %!TEX root = main.tex
			
 
				 \section{Introduction}
			
 
				-TODO\cite{Thoma:2014}
			
 
				-
			
 
				-\section{Terminology}
			
 
				-TODO
			
 
				+Artificial neural networks have dozends of hyperparameters which influence
			
 
				+their behaviour during training and evaluation time. One parameter is the
			
 
				+choice of activation functions. While in principle every neuron could have a
			
 
				+different activation function, in practice networks only use two activation
			
 
				+functions: The softmax function for the output layer in order to obtain a
			
 
				+probability distribution over the possible classes and one activation function
			
 
				+for all other neurons.
			
 
				 
			
 
				+Activation functions should have the following properties:
			
 
				+\begin{itemize}
			
 
				+    \item \textbf{Non-linearity}: A linear activation function in a simple feed
			
 
				+          forward network leads to a linear function. This means no matter how
			
 
				+          many layers the network uses, there is an equivalent network with
			
 
				+          only the input and the output layer. Please note that \glspl{CNN} are
			
 
				+          different. Padding and pooling are also non-linear operations.
			
 
				+    \item \textbf{Differentiability}: Activation functions need to be
			
 
				+          differentiable in order to be able to apply gradient descent. It is
			
 
				+          not necessary that they are differentiable at any point. In practice,
			
 
				+          the gradient at non-differentiable points can simply be set to zero
			
 
				+          in order to prevent weight updates at this point.
			
 
				+    \item \textbf{Non-zero gradient}: The sign function is not suitable for
			
 
				+          gradient descent based optimizers as its gradient is zero at all
			
 
				+          differentiable points. An activation function should have infinitely
			
 
				+          many points with non-zero gradient.
			
 
				+\end{itemize}
			
 
				 
			
 
				-\section{Activation Functions}
			
 
				-Nonlinear, differentiable activation functions are important for neural
			
 
				-networks to allow them to learn nonlinear decision boundaries. One of the
			
 
				-simplest and most widely used activation functions for \glspl{CNN} is
			
 
				-\gls{ReLU}~\cite{AlexNet-2012}, but others such as
			
 
				+One of the simplest and most widely used activation functions for \glspl{CNN}
			
 
				+is \gls{ReLU}~\cite{AlexNet-2012}, but others such as
			
 
				 \gls{ELU}~\cite{clevert2015fast}, \gls{PReLU}~\cite{he2015delving}, softplus~\cite{7280459}
			
 
				-and softsign~\cite{bergstra2009quadratic} have been proposed. The baseline uses
			
 
				-\gls{ELU}.
			
 
				+and softsign~\cite{bergstra2009quadratic} have been proposed.
			
 
				 
			
 
				 Activation functions differ in the range of values and the derivative. The
			
 
				 definitions and other comparisons of eleven activation functions are given
			
 
				 in~\cref{table:activation-functions-overview}.
			
 
				 
			
 
				+
			
 
				+\section{Important Differences of Proposed Activation Functions}
			
 
				 Theoretical explanations why one activation function is preferable to another
			
 
				 in some scenarios are the following:
			
 
				 \begin{itemize}
			
@@ -96,6 +114,7 @@ in~\cref{table:HASYv2-accuracies-activation-functions}. For both datasets, the
 
				 logistic function has a much shorter training time and a noticeably lower test
			
 
				 accuracy.
			
 
				 
			
 
				+\glsunset{LReLU}
			
 
				 \begin{table}[H]
			
 
				     \centering
			
 
				     \begin{tabular}{lccc}
			
@@ -111,7 +130,7 @@ accuracy.
 
				     ReLU          & \cellcolor{yellow!25}Yes\footnotemark & \cellcolor{red!25} No & \cellcolor{yellow!25}Half-sided \\
			
 
				     Softplus      & \cellcolor{green!25}No    & \cellcolor{red!25}   No      & \cellcolor{yellow!25}Half-sided \\
			
 
				     S2ReLU        & \cellcolor{green!25}No    & \cellcolor{green!25}Yes      & \cellcolor{green!25} No \\
			
 
				-    LReLU/PReLU   & \cellcolor{green!25}No    & \cellcolor{green!25}Yes      & \cellcolor{green!25} No \\
			
 
				+    \gls{LReLU}/PReLU   & \cellcolor{green!25}No    & \cellcolor{green!25}Yes      & \cellcolor{green!25} No \\
			
 
				     ELU           & \cellcolor{green!25}No    & \cellcolor{green!25}Yes      & \cellcolor{green!25} No \\
			
 
				     \bottomrule
			
 
				     \end{tabular}
			
@@ -120,8 +139,6 @@ accuracy.
 
				 \end{table}
			
 
				 \footnotetext{The dying ReLU problem is similar to the vanishing gradient problem.}
			
 
				 
			
 
				-\glsunset{LReLU}
			
 
				-
			
 
				 \begin{table}[H]
			
 
				     \centering
			
 
				     \begin{tabular}{lccclllll}
			
@@ -173,4 +190,5 @@ accuracy.
 
				              functions on MNIST.}
			
 
				     \label{table:MNIST-accuracies-activation-functions}
			
 
				 \end{table}
			
 
				-\glsreset{LReLU}
			
 
				+\glsreset{LReLU}
			
 
				+
			
--- a/publications/activation-functions/main.tex
+++ b/publications/activation-functions/main.tex
@@ -7,7 +7,15 @@
 
				 \usepackage{amsmath,amssymb}

			
 
				 \usepackage[table]{xcolor}

			
 
				 \usepackage[absolute,overlay]{textpos}

			
 
				+\usepackage{pgfplots}

			
 
				+\pgfplotsset{compat=1.13}

			
 
				 \usepackage{tikz}

			
 
				+\usetikzlibrary{arrows.meta}

			
 
				+\usetikzlibrary{decorations.pathreplacing}

			
 
				+\usetikzlibrary{positioning}

			
 
				+\usetikzlibrary{decorations.text}

			
 
				+\usetikzlibrary{decorations.pathmorphing}

			
 
				+\usetikzlibrary{shapes.multipart, calc}

			
 
				 \usepackage{csquotes}

			
 
				 \usepackage[binary-units,group-separator={,}]{siunitx}

			
 
				 \sisetup{per-mode=fraction,

			
@@ -59,7 +67,7 @@
 
				 \usepackage{braket}         % needed for \Set

			
 
				 \usepackage{algorithm,algpseudocode}

			
 
				 

			
 
				-\usepackage[xindy,toc,section=chapter,numberedsection=autolabel]{glossaries}

			
 
				+\usepackage[xindy,toc,section=section]{glossaries}

			
 
				 

			
 
				 % Make document nicer

			
 
				 \DeclareMathOperator*{\argmin}{arg\,min}

			
@@ -93,6 +101,7 @@
 
				 \input{content}

			
 
				 \bibliographystyle{IEEEtranSA}

			
 
				 \bibliography{bibliography}

			
 
				+\printglossaries%

			
 
				 \input{appendix}