8 лет назад · e03eaa277b
--- a/documents/cv-curriculum-vitae/cv-curriculum-vitae.tex
+++ b/documents/cv-curriculum-vitae/cv-curriculum-vitae.tex
@@ -164,22 +164,6 @@ and a big, but algorithmically not challenging project. To be honest,
 
																 I only fixed some Java bugs.}\\
															
 
																 %----------------------------------------------------------------------------------------
															
 
																-% WORK EXPERIENCE -2-
															
 
																-
															
 
																-{\raggedleft\textsc{2011}\par}
															
 
																-
															
 
																-{\raggedright\large Student research assistant at \textsc{ Institute of Toxicology and Genetics}, KIT\\
															
 
																-\textit{participating in a university research project}\\[5pt]}
															
 
																-
															
 
																-\normalsize{In summer 2011 I worked for over a month for a
															
 
																-research project at KIT. I have written bash scripts for file
															
 
																-conversions, fixed some bugs and re-written a slow Mathematica script
															
 
																-in a much faster Python version. But it quickly turned out that
															
 
																-this project had a lot of C++ source which was rarely commented or
															
 
																-documented. I realized, that I wouldn't have time for this project
															
 
																-after beginning my studies at university.}\\
															
 
																-
															
 
																-%----------------------------------------------------------------------------------------
															
 
																 % WORK EXPERIENCE -4-
															
 
																 %{\raggedleft\textsc{2010}\par}
															
@@ -208,7 +192,7 @@ after beginning my studies at university.}\\
 
																 \colorbox{shade}{\textcolor{text1}{
															
 
																 \begin{tabular}{c|p{7cm}}
															
 
																-\raisebox{-4pt}{\textifsymbol{18}} & Parkstraße 17, 76131 Karlsruhe \\ % Address
															
 
																+\raisebox{-4pt}{\textifsymbol{18}} & Alte Allee 107, 81245 Munich \\ % Address
															
 
																 \raisebox{-3pt}{\Mobilefone} & +49 $($1636$)$ 28 04 91 \\ % Phone number
															
 
																 \raisebox{-1pt}{\Letter} & \href{mailto:info@martin-thoma.de}{info@martin-thoma.de} \\ % Email address
															
 
																 \Keyboard & \href{http://martin-thoma.com}{martin-thoma.com} \\ % Website
															
@@ -332,6 +316,22 @@ Good Knowledge          & \textsc{Python}\\ \\
 
																 \section{Work Experience}
															
 
																 %----------------------------------------------------------------------------------------
															
 
																+% WORK EXPERIENCE -2-
															
 
																+
															
 
																+{\raggedleft\textsc{2011}\par}
															
 
																+
															
 
																+{\raggedright\large Student research assistant at \textsc{ Institute of Toxicology and Genetics}, KIT\\
															
 
																+\textit{participating in a university research project}\\[5pt]}
															
 
																+
															
 
																+\normalsize{In summer 2011 I worked for over a month for a
															
 
																+research project at KIT. I have written bash scripts for file
															
 
																+conversions, fixed some bugs and re-written a slow Mathematica script
															
 
																+in a much faster Python version. But it quickly turned out that
															
 
																+this project had a lot of C++ source which was rarely commented or
															
 
																+documented. I realized, that I wouldn't have time for this project
															
 
																+after beginning my studies at university.}\\
															
 
																+
															
 
																+%----------------------------------------------------------------------------------------
															
 
																 % WORK EXPERIENCE -3-
															
 
																 {\raggedleft\textsc{since 2011}\par}
															
--- a/documents/math-minimal-distance-to-cubic-function/math-minimal-distance-to-cubic-function.pdf
+++ b/documents/math-minimal-distance-to-cubic-function/math-minimal-distance-to-cubic-function.pdf
--- a/publications/activation-functions/abstract.tex
+++ b/publications/activation-functions/abstract.tex
@@ -1,7 +1,8 @@
 
																 \begin{abstract}

															
 
																 This paper reviews the most common activation functions for convolution neural

															
 
																-networks. They are evaluated on TODO dataset and possible reasons for the

															
 
																-differences in their performance are given.

															
 
																+networks. They are evaluated on the Asirra, GTSRB, HASYv2, STL-10, CIFAR-10,

															
 
																+CIFAR-100 and MNIST dataset. Possible reasons for the differences in their

															
 
																+performance are given.

															
 
																-New state of the art results are achieved for TODO.

															
 
																+New state of the art results are achieved for Asirra, GTSRB, HASYv2 and STL-10.

															
 
																 \end{abstract}

															
--- a/publications/activation-functions/appendix.tex
+++ b/publications/activation-functions/appendix.tex
@@ -7,17 +7,17 @@
 
																     \centering
															
 
																     \hspace*{-1cm}\begin{tabular}{lllll}
															
 
																     \toprule
															
 
																-    Name                     & Function $\varphi(x)$ & Range of Values & $\varphi'(x)$ \\\midrule % & Used by 
															
 
																-    Sign function$^\dagger$  & $\begin{cases}+1 &\text{if } x \geq 0\\-1 &\text{if } x < 0\end{cases}$ & $\Set{-1,1}$                              & $0$                    \\%& \cite{971754} \\
															
 
																-    \parbox[t]{2.6cm}{Heaviside\\step function$^\dagger$} & $\begin{cases}+1 &\text{if } x > 0\\0 &\text{if } x < 0\end{cases}$ & $\Set{0, 1}$  & $0$                       \\%& \cite{mcculloch1943logical}\\
															
 
																-    Logistic function        & $\frac{1}{1+e^{-x}}$                           & $[0, 1]$                                                        & $\frac{e^x}{(e^x +1)^2}$  \\%& \cite{duch1999survey} \\
															
 
																-    Tanh                     & $\frac{e^x - e^{-x}}{e^x + e^{-x}} = \tanh(x)$ & $[-1, 1]$                                                       & $\sech^2(x)$              \\%& \cite{LeNet-5,Thoma:2014}\\
															
 
																-    \gls{ReLU}$^\dagger$           & $\max(0, x)$                                   & $[0, +\infty)$                                                  & $\begin{cases}1 &\text{if } x > 0\\0 &\text{if } x < 0\end{cases}$      \\%& \cite{AlexNet-2012}\\
															
 
																-    \parbox[t]{2.6cm}{\gls{LReLU}$^\dagger$\footnotemark\\(\gls{PReLU})} & $\varphi(x) = \max(\alpha x, x)$                        & $(-\infty, +\infty)$                                             & $\begin{cases}1 &\text{if } x > 0\\\alpha &\text{if } x < 0\end{cases}$ \\%& \cite{maas2013rectifier,he2015delving} \\
															
 
																-    Softplus                 & $\log(e^x + 1)$                                & $(0, +\infty)$                                       & $\frac{e^x}{e^x + 1}$    \\%& \cite{dugas2001incorporating,glorot2011deep} \\
															
 
																-    \gls{ELU}                & $\begin{cases}x &\text{if } x > 0\\\alpha (e^x - 1) &\text{if } x \leq 0\end{cases}$ & $(-\infty, +\infty)$ & $\begin{cases}1 &\text{if } x > 0\\\alpha e^x &\text{otherwise}\end{cases}$ \\%& \cite{clevert2015fast} \\
															
 
																-    Softmax$^\ddagger$       & $o(\mathbf{x})_j = \frac{e^{x_j}}{\sum_{k=1}^K e^{x_k}}$    & $[0, 1]^K$                                           & $o(\mathbf{x})_j \cdot \frac{\sum_{k=1}^K e^{x_k} - e^{x_j}}{\sum_{k=1}^K e^{x_k}}$          \\%& \cite{AlexNet-2012,Thoma:2014}\\
															
 
																-    Maxout$^\ddagger$        & $o(\mathbf{x}) = \max_{x \in \mathbf{x}} x$                 & $(-\infty, +\infty)$                                 & $\begin{cases}1 &\text{if } x_i = \max \mathbf{x}\\0 &\text{otherwise}\end{cases}$          \\%& \cite{goodfellow2013maxout}       \\
															
 
																+    Name                     & Function $\varphi(x)$ & Range of Values & $\varphi'(x)$ & Used by \\\midrule % 
															
 
																+    Sign function$^\dagger$  & $\begin{cases}+1 &\text{if } x \geq 0\\-1 &\text{if } x < 0\end{cases}$ & $\Set{-1,1}$                              & $0$                    & \cite{971754} \\
															
 
																+    \parbox[t]{2.6cm}{Heaviside\\step function$^\dagger$} & $\begin{cases}+1 &\text{if } x > 0\\0 &\text{if } x < 0\end{cases}$ & $\Set{0, 1}$  & $0$                       & \cite{mcculloch1943logical}\\
															
 
																+    Logistic function        & $\frac{1}{1+e^{-x}}$                           & $[0, 1]$                                                        & $\frac{e^x}{(e^x +1)^2}$  & \cite{duch1999survey} \\
															
 
																+    Tanh                     & $\frac{e^x - e^{-x}}{e^x + e^{-x}} = \tanh(x)$ & $[-1, 1]$                                                       & $\sech^2(x)$              & \cite{LeNet-5,Thoma:2014}\\
															
 
																+    \gls{ReLU}$^\dagger$           & $\max(0, x)$                                   & $[0, +\infty)$                                                  & $\begin{cases}1 &\text{if } x > 0\\0 &\text{if } x < 0\end{cases}$      & \cite{AlexNet-2012}\\
															
 
																+    \parbox[t]{2.6cm}{\gls{LReLU}$^\dagger$\footnotemark\\(\gls{PReLU})} & $\varphi(x) = \max(\alpha x, x)$                        & $(-\infty, +\infty)$                                             & $\begin{cases}1 &\text{if } x > 0\\\alpha &\text{if } x < 0\end{cases}$ & \cite{maas2013rectifier,he2015delving} \\
															
 
																+    Softplus                 & $\log(e^x + 1)$                                & $(0, +\infty)$                                       & $\frac{e^x}{e^x + 1}$    & \cite{dugas2001incorporating,glorot2011deep} \\
															
 
																+    \gls{ELU}                & $\begin{cases}x &\text{if } x > 0\\\alpha (e^x - 1) &\text{if } x \leq 0\end{cases}$ & $(-\infty, +\infty)$ & $\begin{cases}1 &\text{if } x > 0\\\alpha e^x &\text{otherwise}\end{cases}$ & \cite{clevert2015fast} \\
															
 
																+    Softmax$^\ddagger$       & $o(\mathbf{x})_j = \frac{e^{x_j}}{\sum_{k=1}^K e^{x_k}}$    & $[0, 1]^K$                                           & $o(\mathbf{x})_j \cdot \frac{\sum_{k=1}^K e^{x_k} - e^{x_j}}{\sum_{k=1}^K e^{x_k}}$         & \cite{AlexNet-2012,Thoma:2014}\\
															
 
																+    Maxout$^\ddagger$        & $o(\mathbf{x}) = \max_{x \in \mathbf{x}} x$                 & $(-\infty, +\infty)$                                 & $\begin{cases}1 &\text{if } x_i = \max \mathbf{x}\\0 &\text{otherwise}\end{cases}$          & \cite{goodfellow2013maxout}       \\
															
 
																     \bottomrule
															
 
																     \end{tabular}
															
 
																     \caption[Activation functions]{Overview of activation functions. Functions
															
@@ -63,13 +63,11 @@
 
																     \end{tabular}
															
 
																     \caption[Activation function evaluation results on CIFAR-100]{Training and
															
 
																              test accuracy of adjusted baseline models trained with different
															
 
																-             activation functions on CIFAR-100. For LReLU, $\alpha = 0.3$ was
															
 
																+             activation functions on CIFAR-100. For \gls{LReLU}, $\alpha = 0.3$ was
															
 
																              chosen.}
															
 
																     \label{table:CIFAR-100-accuracies-activation-functions}
															
 
																 \end{table}
															
 
																-\glsreset{LReLU}
															
 
																-
															
 
																 \begin{table}[H]
															
 
																     \centering
															
 
																     \setlength\tabcolsep{1.5pt}
															
@@ -91,7 +89,7 @@
 
																     \end{tabular}
															
 
																     \caption[Activation function evaluation results on HASYv2]{Test accuracy of
															
 
																              adjusted baseline models trained with different activation
															
 
																-             functions on HASYv2. For LReLU, $\alpha = 0.3$ was chosen.}
															
 
																+             functions on HASYv2. For \gls{LReLU}, $\alpha = 0.3$ was chosen.}
															
 
																     \label{table:HASYv2-accuracies-activation-functions}
															
 
																 \end{table}
															
@@ -116,8 +114,93 @@
 
																     \end{tabular}
															
 
																     \caption[Activation function evaluation results on STL-10]{Test accuracy of
															
 
																              adjusted baseline models trained with different activation
															
 
																-             functions on STL-10. For LReLU, $\alpha = 0.3$ was chosen.}
															
 
																+             functions on STL-10. For \gls{LReLU}, $\alpha = 0.3$ was chosen.}
															
 
																     \label{table:STL-10-accuracies-activation-functions}
															
 
																 \end{table}
															
 
																+\begin{table}[H]
															
 
																+    \centering
															
 
																+    \hspace*{-1cm}\begin{tabular}{lllll}
															
 
																+    \toprule
															
 
																+    Name                     & Function $\varphi(x)$ & Range of Values & $\varphi'(x)$ \\\midrule % & Used by 
															
 
																+    Sign function$^\dagger$  & $\begin{cases}+1 &\text{if } x \geq 0\\-1 &\text{if } x < 0\end{cases}$ & $\Set{-1,1}$                              & $0$                    \\%& \cite{971754} \\
															
 
																+    \parbox[t]{2.6cm}{Heaviside\\step function$^\dagger$} & $\begin{cases}+1 &\text{if } x > 0\\0 &\text{if } x < 0\end{cases}$ & $\Set{0, 1}$  & $0$                       \\%& \cite{mcculloch1943logical}\\
															
 
																+    Logistic function        & $\frac{1}{1+e^{-x}}$                           & $[0, 1]$                                                        & $\frac{e^x}{(e^x +1)^2}$  \\%& \cite{duch1999survey} \\
															
 
																+    Tanh                     & $\frac{e^x - e^{-x}}{e^x + e^{-x}} = \tanh(x)$ & $[-1, 1]$                                                       & $\sech^2(x)$              \\%& \cite{LeNet-5,Thoma:2014}\\
															
 
																+    \gls{ReLU}$^\dagger$           & $\max(0, x)$                                   & $[0, +\infty)$                                                  & $\begin{cases}1 &\text{if } x > 0\\0 &\text{if } x < 0\end{cases}$      \\%& \cite{AlexNet-2012}\\
															
 
																+    \parbox[t]{2.6cm}{\gls{LReLU}$^\dagger$\footnotemark\\(\gls{PReLU})} & $\varphi(x) = \max(\alpha x, x)$                        & $(-\infty, +\infty)$                                             & $\begin{cases}1 &\text{if } x > 0\\\alpha &\text{if } x < 0\end{cases}$ \\%& \cite{maas2013rectifier,he2015delving} \\
															
 
																+    Softplus                 & $\log(e^x + 1)$                                & $(0, +\infty)$                                       & $\frac{e^x}{e^x + 1}$    \\%& \cite{dugas2001incorporating,glorot2011deep} \\
															
 
																+    \gls{ELU}                & $\begin{cases}x &\text{if } x > 0\\\alpha (e^x - 1) &\text{if } x \leq 0\end{cases}$ & $(-\infty, +\infty)$ & $\begin{cases}1 &\text{if } x > 0\\\alpha e^x &\text{otherwise}\end{cases}$ \\%& \cite{clevert2015fast} \\
															
 
																+    Softmax$^\ddagger$       & $o(\mathbf{x})_j = \frac{e^{x_j}}{\sum_{k=1}^K e^{x_k}}$    & $[0, 1]^K$                                           & $o(\mathbf{x})_j \cdot \frac{\sum_{k=1}^K e^{x_k} - e^{x_j}}{\sum_{k=1}^K e^{x_k}}$          \\%& \cite{AlexNet-2012,Thoma:2014}\\
															
 
																+    Maxout$^\ddagger$        & $o(\mathbf{x}) = \max_{x \in \mathbf{x}} x$                 & $(-\infty, +\infty)$                                 & $\begin{cases}1 &\text{if } x_i = \max \mathbf{x}\\0 &\text{otherwise}\end{cases}$          \\%& \cite{goodfellow2013maxout}       \\
															
 
																+    \bottomrule
															
 
																+    \end{tabular}
															
 
																+    \caption[Activation functions]{Overview of activation functions. Functions
															
 
																+             marked with $\dagger$ are not differentiable at 0 and functions
															
 
																+             marked with $\ddagger$ operate on all elements of a layer
															
 
																+             simultaneously. The hyperparameters $\alpha \in (0, 1)$ of Leaky
															
 
																+             ReLU and ELU are typically $\alpha = 0.01$. Other activation
															
 
																+             function like randomized leaky ReLUs exist~\cite{xu2015empirical},
															
 
																+             but are far less commonly used.\\
															
 
																+             Some functions are smoothed versions of others, like the logistic
															
 
																+             function for the Heaviside step function, tanh for the sign
															
 
																+             function, softplus for ReLU.\\
															
 
																+             Softmax is the standard activation function for the last layer of
															
 
																+             a classification network as it produces a probability
															
 
																+             distribution. See \Cref{fig:activation-functions-plot} for a plot
															
 
																+             of some of them.}
															
 
																+    \label{table:activation-functions-overview}
															
 
																+\end{table}
															
 
																+\footnotetext{$\alpha$ is a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function.}
															
 
																+
															
 
																+\begin{figure}[ht]
															
 
																+    \centering
															
 
																+    \begin{tikzpicture}
															
 
																+        \definecolor{color1}{HTML}{E66101}
															
 
																+        \definecolor{color2}{HTML}{FDB863}
															
 
																+        \definecolor{color3}{HTML}{B2ABD2}
															
 
																+        \definecolor{color4}{HTML}{5E3C99}
															
 
																+        \begin{axis}[
															
 
																+            legend pos=north west,
															
 
																+            legend cell align={left},
															
 
																+            axis x line=middle,
															
 
																+            axis y line=middle,
															
 
																+            x tick label style={/pgf/number format/fixed,
															
 
																+                                /pgf/number format/fixed zerofill,
															
 
																+                                /pgf/number format/precision=1},
															
 
																+            y tick label style={/pgf/number format/fixed,
															
 
																+                                /pgf/number format/fixed zerofill,
															
 
																+                                /pgf/number format/precision=1},
															
 
																+            grid = major,
															
 
																+            width=16cm,
															
 
																+            height=8cm,
															
 
																+            grid style={dashed, gray!30},
															
 
																+            xmin=-2,     % start the diagram at this x-coordinate
															
 
																+            xmax= 2,     % end   the diagram at this x-coordinate
															
 
																+            ymin=-1,     % start the diagram at this y-coordinate
															
 
																+            ymax= 2,     % end   the diagram at this y-coordinate
															
 
																+            xlabel=x,
															
 
																+            ylabel=y,
															
 
																+            tick align=outside,
															
 
																+            enlargelimits=false]
															
 
																+          \addplot[domain=-2:2, color1, ultra thick,samples=500] {1/(1+exp(-x))};
															
 
																+          \addplot[domain=-2:2, color2, ultra thick,samples=500] {tanh(x)};
															
 
																+          \addplot[domain=-2:2, color4, ultra thick,samples=500] {max(0, x)};
															
 
																+          \addplot[domain=-2:2, color4, ultra thick,samples=500, dashed] {ln(exp(x) + 1)};
															
 
																+          \addplot[domain=-2:2, color3, ultra thick,samples=500, dotted] {max(x, exp(x) - 1)};
															
 
																+          \addlegendentry{$\varphi_1(x)=\frac{1}{1+e^{-x}}$}
															
 
																+          \addlegendentry{$\varphi_2(x)=\tanh(x)$}
															
 
																+          \addlegendentry{$\varphi_3(x)=\max(0, x)$}
															
 
																+          \addlegendentry{$\varphi_4(x)=\log(e^x + 1)$}
															
 
																+          \addlegendentry{$\varphi_5(x)=\max(x, e^x - 1)$}
															
 
																+        \end{axis}
															
 
																+    \end{tikzpicture}
															
 
																+    \caption[Activation functions]{Activation functions plotted in $[-2, +2]$.
															
 
																+             $\tanh$ and ELU are able to produce negative numbers. The image of
															
 
																+             ELU, ReLU and Softplus is not bound on the positive side, whereas
															
 
																+             $\tanh$ and the logistic function are always below~1.}
															
 
																+    \label{fig:activation-functions-plot}
															
 
																+\end{figure}
															
 
																+
															
 
																+\glsreset{LReLU}
															
 
																 \twocolumn
															
--- a/publications/activation-functions/content.tex
+++ b/publications/activation-functions/content.tex
@@ -1,24 +1,42 @@
 
																 %!TEX root = main.tex
															
 
																 \section{Introduction}
															
 
																-TODO\cite{Thoma:2014}
															
 
																-
															
 
																-\section{Terminology}
															
 
																-TODO
															
 
																+Artificial neural networks have dozends of hyperparameters which influence
															
 
																+their behaviour during training and evaluation time. One parameter is the
															
 
																+choice of activation functions. While in principle every neuron could have a
															
 
																+different activation function, in practice networks only use two activation
															
 
																+functions: The softmax function for the output layer in order to obtain a
															
 
																+probability distribution over the possible classes and one activation function
															
 
																+for all other neurons.
															
 
																+Activation functions should have the following properties:
															
 
																+\begin{itemize}
															
 
																+    \item \textbf{Non-linearity}: A linear activation function in a simple feed
															
 
																+          forward network leads to a linear function. This means no matter how
															
 
																+          many layers the network uses, there is an equivalent network with
															
 
																+          only the input and the output layer. Please note that \glspl{CNN} are
															
 
																+          different. Padding and pooling are also non-linear operations.
															
 
																+    \item \textbf{Differentiability}: Activation functions need to be
															
 
																+          differentiable in order to be able to apply gradient descent. It is
															
 
																+          not necessary that they are differentiable at any point. In practice,
															
 
																+          the gradient at non-differentiable points can simply be set to zero
															
 
																+          in order to prevent weight updates at this point.
															
 
																+    \item \textbf{Non-zero gradient}: The sign function is not suitable for
															
 
																+          gradient descent based optimizers as its gradient is zero at all
															
 
																+          differentiable points. An activation function should have infinitely
															
 
																+          many points with non-zero gradient.
															
 
																+\end{itemize}
															
 
																-\section{Activation Functions}
															
 
																-Nonlinear, differentiable activation functions are important for neural
															
 
																-networks to allow them to learn nonlinear decision boundaries. One of the
															
 
																-simplest and most widely used activation functions for \glspl{CNN} is
															
 
																-\gls{ReLU}~\cite{AlexNet-2012}, but others such as
															
 
																+One of the simplest and most widely used activation functions for \glspl{CNN}
															
 
																+is \gls{ReLU}~\cite{AlexNet-2012}, but others such as
															
 
																 \gls{ELU}~\cite{clevert2015fast}, \gls{PReLU}~\cite{he2015delving}, softplus~\cite{7280459}
															
 
																-and softsign~\cite{bergstra2009quadratic} have been proposed. The baseline uses
															
 
																-\gls{ELU}.
															
 
																+and softsign~\cite{bergstra2009quadratic} have been proposed.
															
 
																 Activation functions differ in the range of values and the derivative. The
															
 
																 definitions and other comparisons of eleven activation functions are given
															
 
																 in~\cref{table:activation-functions-overview}.
															
 
																+
															
 
																+\section{Important Differences of Proposed Activation Functions}
															
 
																 Theoretical explanations why one activation function is preferable to another
															
 
																 in some scenarios are the following:
															
 
																 \begin{itemize}
															
@@ -96,6 +114,7 @@ in~\cref{table:HASYv2-accuracies-activation-functions}. For both datasets, the
 
																 logistic function has a much shorter training time and a noticeably lower test
															
 
																 accuracy.
															
 
																+\glsunset{LReLU}
															
 
																 \begin{table}[H]
															
 
																     \centering
															
 
																     \begin{tabular}{lccc}
															
@@ -111,7 +130,7 @@ accuracy.
 
																     ReLU          & \cellcolor{yellow!25}Yes\footnotemark & \cellcolor{red!25} No & \cellcolor{yellow!25}Half-sided \\
															
 
																     Softplus      & \cellcolor{green!25}No    & \cellcolor{red!25}   No      & \cellcolor{yellow!25}Half-sided \\
															
 
																     S2ReLU        & \cellcolor{green!25}No    & \cellcolor{green!25}Yes      & \cellcolor{green!25} No \\
															
 
																-    LReLU/PReLU   & \cellcolor{green!25}No    & \cellcolor{green!25}Yes      & \cellcolor{green!25} No \\
															
 
																+    \gls{LReLU}/PReLU   & \cellcolor{green!25}No    & \cellcolor{green!25}Yes      & \cellcolor{green!25} No \\
															
 
																     ELU           & \cellcolor{green!25}No    & \cellcolor{green!25}Yes      & \cellcolor{green!25} No \\
															
 
																     \bottomrule
															
 
																     \end{tabular}
															
@@ -120,8 +139,6 @@ accuracy.
 
																 \end{table}
															
 
																 \footnotetext{The dying ReLU problem is similar to the vanishing gradient problem.}
															
 
																-\glsunset{LReLU}
															
 
																-
															
 
																 \begin{table}[H]
															
 
																     \centering
															
 
																     \begin{tabular}{lccclllll}
															
@@ -173,4 +190,5 @@ accuracy.
 
																              functions on MNIST.}
															
 
																     \label{table:MNIST-accuracies-activation-functions}
															
 
																 \end{table}
															
 
																-\glsreset{LReLU}
															
 
																+\glsreset{LReLU}
															
 
																+
															
--- a/publications/activation-functions/main.tex
+++ b/publications/activation-functions/main.tex
@@ -7,7 +7,15 @@
 
																 \usepackage{amsmath,amssymb}

															
 
																 \usepackage[table]{xcolor}

															
 
																 \usepackage[absolute,overlay]{textpos}

															
 
																+\usepackage{pgfplots}

															
 
																+\pgfplotsset{compat=1.13}

															
 
																 \usepackage{tikz}

															
 
																+\usetikzlibrary{arrows.meta}

															
 
																+\usetikzlibrary{decorations.pathreplacing}

															
 
																+\usetikzlibrary{positioning}

															
 
																+\usetikzlibrary{decorations.text}

															
 
																+\usetikzlibrary{decorations.pathmorphing}

															
 
																+\usetikzlibrary{shapes.multipart, calc}

															
 
																 \usepackage{csquotes}

															
 
																 \usepackage[binary-units,group-separator={,}]{siunitx}

															
 
																 \sisetup{per-mode=fraction,

															
@@ -59,7 +67,7 @@
 
																 \usepackage{braket}         % needed for \Set

															
 
																 \usepackage{algorithm,algpseudocode}

															
 
																-\usepackage[xindy,toc,section=chapter,numberedsection=autolabel]{glossaries}

															
 
																+\usepackage[xindy,toc,section=section]{glossaries}

															
 
																 % Make document nicer

															
 
																 \DeclareMathOperator*{\argmin}{arg\,min}

															
@@ -93,6 +101,7 @@
 
																 \input{content}

															
 
																 \bibliographystyle{IEEEtranSA}

															
 
																 \bibliography{bibliography}

															
 
																+\printglossaries%

															
 
																 \input{appendix}