Kaynağa Gözat

Submit changes before installing new os

Martin Thoma 7 yıl önce
ebeveyn
işleme
e03eaa277b

+ 17 - 17
documents/cv-curriculum-vitae/cv-curriculum-vitae.tex

@@ -164,22 +164,6 @@ and a big, but algorithmically not challenging project. To be honest,
 I only fixed some Java bugs.}\\
 I only fixed some Java bugs.}\\
 
 
 %----------------------------------------------------------------------------------------
 %----------------------------------------------------------------------------------------
-% WORK EXPERIENCE -2-
-
-{\raggedleft\textsc{2011}\par}
-
-{\raggedright\large Student research assistant at \textsc{ Institute of Toxicology and Genetics}, KIT\\
-\textit{participating in a university research project}\\[5pt]}
-
-\normalsize{In summer 2011 I worked for over a month for a
-research project at KIT. I have written bash scripts for file
-conversions, fixed some bugs and re-written a slow Mathematica script
-in a much faster Python version. But it quickly turned out that
-this project had a lot of C++ source which was rarely commented or
-documented. I realized, that I wouldn't have time for this project
-after beginning my studies at university.}\\
-
-%----------------------------------------------------------------------------------------
 % WORK EXPERIENCE -4-
 % WORK EXPERIENCE -4-
 
 
 %{\raggedleft\textsc{2010}\par}
 %{\raggedleft\textsc{2010}\par}
@@ -208,7 +192,7 @@ after beginning my studies at university.}\\
 
 
 \colorbox{shade}{\textcolor{text1}{
 \colorbox{shade}{\textcolor{text1}{
 \begin{tabular}{c|p{7cm}}
 \begin{tabular}{c|p{7cm}}
-\raisebox{-4pt}{\textifsymbol{18}} & Parkstraße 17, 76131 Karlsruhe \\ % Address
+\raisebox{-4pt}{\textifsymbol{18}} & Alte Allee 107, 81245 Munich \\ % Address
 \raisebox{-3pt}{\Mobilefone} & +49 $($1636$)$ 28 04 91 \\ % Phone number
 \raisebox{-3pt}{\Mobilefone} & +49 $($1636$)$ 28 04 91 \\ % Phone number
 \raisebox{-1pt}{\Letter} & \href{mailto:info@martin-thoma.de}{info@martin-thoma.de} \\ % Email address
 \raisebox{-1pt}{\Letter} & \href{mailto:info@martin-thoma.de}{info@martin-thoma.de} \\ % Email address
 \Keyboard & \href{http://martin-thoma.com}{martin-thoma.com} \\ % Website
 \Keyboard & \href{http://martin-thoma.com}{martin-thoma.com} \\ % Website
@@ -332,6 +316,22 @@ Good Knowledge          & \textsc{Python}\\ \\
 
 
 \section{Work Experience}
 \section{Work Experience}
 %----------------------------------------------------------------------------------------
 %----------------------------------------------------------------------------------------
+% WORK EXPERIENCE -2-
+
+{\raggedleft\textsc{2011}\par}
+
+{\raggedright\large Student research assistant at \textsc{ Institute of Toxicology and Genetics}, KIT\\
+\textit{participating in a university research project}\\[5pt]}
+
+\normalsize{In summer 2011 I worked for over a month for a
+research project at KIT. I have written bash scripts for file
+conversions, fixed some bugs and re-written a slow Mathematica script
+in a much faster Python version. But it quickly turned out that
+this project had a lot of C++ source which was rarely commented or
+documented. I realized, that I wouldn't have time for this project
+after beginning my studies at university.}\\
+
+%----------------------------------------------------------------------------------------
 % WORK EXPERIENCE -3-
 % WORK EXPERIENCE -3-
 
 
 {\raggedleft\textsc{since 2011}\par}
 {\raggedleft\textsc{since 2011}\par}

BIN
documents/math-minimal-distance-to-cubic-function/math-minimal-distance-to-cubic-function.pdf


+ 4 - 3
publications/activation-functions/abstract.tex

@@ -1,7 +1,8 @@
 \begin{abstract}
 \begin{abstract}
 This paper reviews the most common activation functions for convolution neural
 This paper reviews the most common activation functions for convolution neural
-networks. They are evaluated on TODO dataset and possible reasons for the
-differences in their performance are given.
+networks. They are evaluated on the Asirra, GTSRB, HASYv2, STL-10, CIFAR-10,
+CIFAR-100 and MNIST dataset. Possible reasons for the differences in their
+performance are given.
 
 
-New state of the art results are achieved for TODO.
+New state of the art results are achieved for Asirra, GTSRB, HASYv2 and STL-10.
 \end{abstract}
 \end{abstract}

+ 99 - 16
publications/activation-functions/appendix.tex

@@ -7,17 +7,17 @@
     \centering
     \centering
     \hspace*{-1cm}\begin{tabular}{lllll}
     \hspace*{-1cm}\begin{tabular}{lllll}
     \toprule
     \toprule
-    Name                     & Function $\varphi(x)$ & Range of Values & $\varphi'(x)$ \\\midrule % & Used by 
-    Sign function$^\dagger$  & $\begin{cases}+1 &\text{if } x \geq 0\\-1 &\text{if } x < 0\end{cases}$ & $\Set{-1,1}$                              & $0$                    \\%& \cite{971754} \\
-    \parbox[t]{2.6cm}{Heaviside\\step function$^\dagger$} & $\begin{cases}+1 &\text{if } x > 0\\0 &\text{if } x < 0\end{cases}$ & $\Set{0, 1}$  & $0$                       \\%& \cite{mcculloch1943logical}\\
-    Logistic function        & $\frac{1}{1+e^{-x}}$                           & $[0, 1]$                                                        & $\frac{e^x}{(e^x +1)^2}$  \\%& \cite{duch1999survey} \\
-    Tanh                     & $\frac{e^x - e^{-x}}{e^x + e^{-x}} = \tanh(x)$ & $[-1, 1]$                                                       & $\sech^2(x)$              \\%& \cite{LeNet-5,Thoma:2014}\\
-    \gls{ReLU}$^\dagger$           & $\max(0, x)$                                   & $[0, +\infty)$                                                  & $\begin{cases}1 &\text{if } x > 0\\0 &\text{if } x < 0\end{cases}$      \\%& \cite{AlexNet-2012}\\
-    \parbox[t]{2.6cm}{\gls{LReLU}$^\dagger$\footnotemark\\(\gls{PReLU})} & $\varphi(x) = \max(\alpha x, x)$                        & $(-\infty, +\infty)$                                             & $\begin{cases}1 &\text{if } x > 0\\\alpha &\text{if } x < 0\end{cases}$ \\%& \cite{maas2013rectifier,he2015delving} \\
-    Softplus                 & $\log(e^x + 1)$                                & $(0, +\infty)$                                       & $\frac{e^x}{e^x + 1}$    \\%& \cite{dugas2001incorporating,glorot2011deep} \\
-    \gls{ELU}                & $\begin{cases}x &\text{if } x > 0\\\alpha (e^x - 1) &\text{if } x \leq 0\end{cases}$ & $(-\infty, +\infty)$ & $\begin{cases}1 &\text{if } x > 0\\\alpha e^x &\text{otherwise}\end{cases}$ \\%& \cite{clevert2015fast} \\
-    Softmax$^\ddagger$       & $o(\mathbf{x})_j = \frac{e^{x_j}}{\sum_{k=1}^K e^{x_k}}$    & $[0, 1]^K$                                           & $o(\mathbf{x})_j \cdot \frac{\sum_{k=1}^K e^{x_k} - e^{x_j}}{\sum_{k=1}^K e^{x_k}}$          \\%& \cite{AlexNet-2012,Thoma:2014}\\
-    Maxout$^\ddagger$        & $o(\mathbf{x}) = \max_{x \in \mathbf{x}} x$                 & $(-\infty, +\infty)$                                 & $\begin{cases}1 &\text{if } x_i = \max \mathbf{x}\\0 &\text{otherwise}\end{cases}$          \\%& \cite{goodfellow2013maxout}       \\
+    Name                     & Function $\varphi(x)$ & Range of Values & $\varphi'(x)$ & Used by \\\midrule % 
+    Sign function$^\dagger$  & $\begin{cases}+1 &\text{if } x \geq 0\\-1 &\text{if } x < 0\end{cases}$ & $\Set{-1,1}$                              & $0$                    & \cite{971754} \\
+    \parbox[t]{2.6cm}{Heaviside\\step function$^\dagger$} & $\begin{cases}+1 &\text{if } x > 0\\0 &\text{if } x < 0\end{cases}$ & $\Set{0, 1}$  & $0$                       & \cite{mcculloch1943logical}\\
+    Logistic function        & $\frac{1}{1+e^{-x}}$                           & $[0, 1]$                                                        & $\frac{e^x}{(e^x +1)^2}$  & \cite{duch1999survey} \\
+    Tanh                     & $\frac{e^x - e^{-x}}{e^x + e^{-x}} = \tanh(x)$ & $[-1, 1]$                                                       & $\sech^2(x)$              & \cite{LeNet-5,Thoma:2014}\\
+    \gls{ReLU}$^\dagger$           & $\max(0, x)$                                   & $[0, +\infty)$                                                  & $\begin{cases}1 &\text{if } x > 0\\0 &\text{if } x < 0\end{cases}$      & \cite{AlexNet-2012}\\
+    \parbox[t]{2.6cm}{\gls{LReLU}$^\dagger$\footnotemark\\(\gls{PReLU})} & $\varphi(x) = \max(\alpha x, x)$                        & $(-\infty, +\infty)$                                             & $\begin{cases}1 &\text{if } x > 0\\\alpha &\text{if } x < 0\end{cases}$ & \cite{maas2013rectifier,he2015delving} \\
+    Softplus                 & $\log(e^x + 1)$                                & $(0, +\infty)$                                       & $\frac{e^x}{e^x + 1}$    & \cite{dugas2001incorporating,glorot2011deep} \\
+    \gls{ELU}                & $\begin{cases}x &\text{if } x > 0\\\alpha (e^x - 1) &\text{if } x \leq 0\end{cases}$ & $(-\infty, +\infty)$ & $\begin{cases}1 &\text{if } x > 0\\\alpha e^x &\text{otherwise}\end{cases}$ & \cite{clevert2015fast} \\
+    Softmax$^\ddagger$       & $o(\mathbf{x})_j = \frac{e^{x_j}}{\sum_{k=1}^K e^{x_k}}$    & $[0, 1]^K$                                           & $o(\mathbf{x})_j \cdot \frac{\sum_{k=1}^K e^{x_k} - e^{x_j}}{\sum_{k=1}^K e^{x_k}}$         & \cite{AlexNet-2012,Thoma:2014}\\
+    Maxout$^\ddagger$        & $o(\mathbf{x}) = \max_{x \in \mathbf{x}} x$                 & $(-\infty, +\infty)$                                 & $\begin{cases}1 &\text{if } x_i = \max \mathbf{x}\\0 &\text{otherwise}\end{cases}$          & \cite{goodfellow2013maxout}       \\
     \bottomrule
     \bottomrule
     \end{tabular}
     \end{tabular}
     \caption[Activation functions]{Overview of activation functions. Functions
     \caption[Activation functions]{Overview of activation functions. Functions
@@ -63,13 +63,11 @@
     \end{tabular}
     \end{tabular}
     \caption[Activation function evaluation results on CIFAR-100]{Training and
     \caption[Activation function evaluation results on CIFAR-100]{Training and
              test accuracy of adjusted baseline models trained with different
              test accuracy of adjusted baseline models trained with different
-             activation functions on CIFAR-100. For LReLU, $\alpha = 0.3$ was
+             activation functions on CIFAR-100. For \gls{LReLU}, $\alpha = 0.3$ was
              chosen.}
              chosen.}
     \label{table:CIFAR-100-accuracies-activation-functions}
     \label{table:CIFAR-100-accuracies-activation-functions}
 \end{table}
 \end{table}
 
 
-\glsreset{LReLU}
-
 \begin{table}[H]
 \begin{table}[H]
     \centering
     \centering
     \setlength\tabcolsep{1.5pt}
     \setlength\tabcolsep{1.5pt}
@@ -91,7 +89,7 @@
     \end{tabular}
     \end{tabular}
     \caption[Activation function evaluation results on HASYv2]{Test accuracy of
     \caption[Activation function evaluation results on HASYv2]{Test accuracy of
              adjusted baseline models trained with different activation
              adjusted baseline models trained with different activation
-             functions on HASYv2. For LReLU, $\alpha = 0.3$ was chosen.}
+             functions on HASYv2. For \gls{LReLU}, $\alpha = 0.3$ was chosen.}
     \label{table:HASYv2-accuracies-activation-functions}
     \label{table:HASYv2-accuracies-activation-functions}
 \end{table}
 \end{table}
 
 
@@ -116,8 +114,93 @@
     \end{tabular}
     \end{tabular}
     \caption[Activation function evaluation results on STL-10]{Test accuracy of
     \caption[Activation function evaluation results on STL-10]{Test accuracy of
              adjusted baseline models trained with different activation
              adjusted baseline models trained with different activation
-             functions on STL-10. For LReLU, $\alpha = 0.3$ was chosen.}
+             functions on STL-10. For \gls{LReLU}, $\alpha = 0.3$ was chosen.}
     \label{table:STL-10-accuracies-activation-functions}
     \label{table:STL-10-accuracies-activation-functions}
 \end{table}
 \end{table}
 
 
+\begin{table}[H]
+    \centering
+    \hspace*{-1cm}\begin{tabular}{lllll}
+    \toprule
+    Name                     & Function $\varphi(x)$ & Range of Values & $\varphi'(x)$ \\\midrule % & Used by 
+    Sign function$^\dagger$  & $\begin{cases}+1 &\text{if } x \geq 0\\-1 &\text{if } x < 0\end{cases}$ & $\Set{-1,1}$                              & $0$                    \\%& \cite{971754} \\
+    \parbox[t]{2.6cm}{Heaviside\\step function$^\dagger$} & $\begin{cases}+1 &\text{if } x > 0\\0 &\text{if } x < 0\end{cases}$ & $\Set{0, 1}$  & $0$                       \\%& \cite{mcculloch1943logical}\\
+    Logistic function        & $\frac{1}{1+e^{-x}}$                           & $[0, 1]$                                                        & $\frac{e^x}{(e^x +1)^2}$  \\%& \cite{duch1999survey} \\
+    Tanh                     & $\frac{e^x - e^{-x}}{e^x + e^{-x}} = \tanh(x)$ & $[-1, 1]$                                                       & $\sech^2(x)$              \\%& \cite{LeNet-5,Thoma:2014}\\
+    \gls{ReLU}$^\dagger$           & $\max(0, x)$                                   & $[0, +\infty)$                                                  & $\begin{cases}1 &\text{if } x > 0\\0 &\text{if } x < 0\end{cases}$      \\%& \cite{AlexNet-2012}\\
+    \parbox[t]{2.6cm}{\gls{LReLU}$^\dagger$\footnotemark\\(\gls{PReLU})} & $\varphi(x) = \max(\alpha x, x)$                        & $(-\infty, +\infty)$                                             & $\begin{cases}1 &\text{if } x > 0\\\alpha &\text{if } x < 0\end{cases}$ \\%& \cite{maas2013rectifier,he2015delving} \\
+    Softplus                 & $\log(e^x + 1)$                                & $(0, +\infty)$                                       & $\frac{e^x}{e^x + 1}$    \\%& \cite{dugas2001incorporating,glorot2011deep} \\
+    \gls{ELU}                & $\begin{cases}x &\text{if } x > 0\\\alpha (e^x - 1) &\text{if } x \leq 0\end{cases}$ & $(-\infty, +\infty)$ & $\begin{cases}1 &\text{if } x > 0\\\alpha e^x &\text{otherwise}\end{cases}$ \\%& \cite{clevert2015fast} \\
+    Softmax$^\ddagger$       & $o(\mathbf{x})_j = \frac{e^{x_j}}{\sum_{k=1}^K e^{x_k}}$    & $[0, 1]^K$                                           & $o(\mathbf{x})_j \cdot \frac{\sum_{k=1}^K e^{x_k} - e^{x_j}}{\sum_{k=1}^K e^{x_k}}$          \\%& \cite{AlexNet-2012,Thoma:2014}\\
+    Maxout$^\ddagger$        & $o(\mathbf{x}) = \max_{x \in \mathbf{x}} x$                 & $(-\infty, +\infty)$                                 & $\begin{cases}1 &\text{if } x_i = \max \mathbf{x}\\0 &\text{otherwise}\end{cases}$          \\%& \cite{goodfellow2013maxout}       \\
+    \bottomrule
+    \end{tabular}
+    \caption[Activation functions]{Overview of activation functions. Functions
+             marked with $\dagger$ are not differentiable at 0 and functions
+             marked with $\ddagger$ operate on all elements of a layer
+             simultaneously. The hyperparameters $\alpha \in (0, 1)$ of Leaky
+             ReLU and ELU are typically $\alpha = 0.01$. Other activation
+             function like randomized leaky ReLUs exist~\cite{xu2015empirical},
+             but are far less commonly used.\\
+             Some functions are smoothed versions of others, like the logistic
+             function for the Heaviside step function, tanh for the sign
+             function, softplus for ReLU.\\
+             Softmax is the standard activation function for the last layer of
+             a classification network as it produces a probability
+             distribution. See \Cref{fig:activation-functions-plot} for a plot
+             of some of them.}
+    \label{table:activation-functions-overview}
+\end{table}
+\footnotetext{$\alpha$ is a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function.}
+
+\begin{figure}[ht]
+    \centering
+    \begin{tikzpicture}
+        \definecolor{color1}{HTML}{E66101}
+        \definecolor{color2}{HTML}{FDB863}
+        \definecolor{color3}{HTML}{B2ABD2}
+        \definecolor{color4}{HTML}{5E3C99}
+        \begin{axis}[
+            legend pos=north west,
+            legend cell align={left},
+            axis x line=middle,
+            axis y line=middle,
+            x tick label style={/pgf/number format/fixed,
+                                /pgf/number format/fixed zerofill,
+                                /pgf/number format/precision=1},
+            y tick label style={/pgf/number format/fixed,
+                                /pgf/number format/fixed zerofill,
+                                /pgf/number format/precision=1},
+            grid = major,
+            width=16cm,
+            height=8cm,
+            grid style={dashed, gray!30},
+            xmin=-2,     % start the diagram at this x-coordinate
+            xmax= 2,     % end   the diagram at this x-coordinate
+            ymin=-1,     % start the diagram at this y-coordinate
+            ymax= 2,     % end   the diagram at this y-coordinate
+            xlabel=x,
+            ylabel=y,
+            tick align=outside,
+            enlargelimits=false]
+          \addplot[domain=-2:2, color1, ultra thick,samples=500] {1/(1+exp(-x))};
+          \addplot[domain=-2:2, color2, ultra thick,samples=500] {tanh(x)};
+          \addplot[domain=-2:2, color4, ultra thick,samples=500] {max(0, x)};
+          \addplot[domain=-2:2, color4, ultra thick,samples=500, dashed] {ln(exp(x) + 1)};
+          \addplot[domain=-2:2, color3, ultra thick,samples=500, dotted] {max(x, exp(x) - 1)};
+          \addlegendentry{$\varphi_1(x)=\frac{1}{1+e^{-x}}$}
+          \addlegendentry{$\varphi_2(x)=\tanh(x)$}
+          \addlegendentry{$\varphi_3(x)=\max(0, x)$}
+          \addlegendentry{$\varphi_4(x)=\log(e^x + 1)$}
+          \addlegendentry{$\varphi_5(x)=\max(x, e^x - 1)$}
+        \end{axis}
+    \end{tikzpicture}
+    \caption[Activation functions]{Activation functions plotted in $[-2, +2]$.
+             $\tanh$ and ELU are able to produce negative numbers. The image of
+             ELU, ReLU and Softplus is not bound on the positive side, whereas
+             $\tanh$ and the logistic function are always below~1.}
+    \label{fig:activation-functions-plot}
+\end{figure}
+
+\glsreset{LReLU}
 \twocolumn
 \twocolumn

+ 33 - 15
publications/activation-functions/content.tex

@@ -1,24 +1,42 @@
 %!TEX root = main.tex
 %!TEX root = main.tex
 \section{Introduction}
 \section{Introduction}
-TODO\cite{Thoma:2014}
-
-\section{Terminology}
-TODO
+Artificial neural networks have dozends of hyperparameters which influence
+their behaviour during training and evaluation time. One parameter is the
+choice of activation functions. While in principle every neuron could have a
+different activation function, in practice networks only use two activation
+functions: The softmax function for the output layer in order to obtain a
+probability distribution over the possible classes and one activation function
+for all other neurons.
 
 
+Activation functions should have the following properties:
+\begin{itemize}
+    \item \textbf{Non-linearity}: A linear activation function in a simple feed
+          forward network leads to a linear function. This means no matter how
+          many layers the network uses, there is an equivalent network with
+          only the input and the output layer. Please note that \glspl{CNN} are
+          different. Padding and pooling are also non-linear operations.
+    \item \textbf{Differentiability}: Activation functions need to be
+          differentiable in order to be able to apply gradient descent. It is
+          not necessary that they are differentiable at any point. In practice,
+          the gradient at non-differentiable points can simply be set to zero
+          in order to prevent weight updates at this point.
+    \item \textbf{Non-zero gradient}: The sign function is not suitable for
+          gradient descent based optimizers as its gradient is zero at all
+          differentiable points. An activation function should have infinitely
+          many points with non-zero gradient.
+\end{itemize}
 
 
-\section{Activation Functions}
-Nonlinear, differentiable activation functions are important for neural
-networks to allow them to learn nonlinear decision boundaries. One of the
-simplest and most widely used activation functions for \glspl{CNN} is
-\gls{ReLU}~\cite{AlexNet-2012}, but others such as
+One of the simplest and most widely used activation functions for \glspl{CNN}
+is \gls{ReLU}~\cite{AlexNet-2012}, but others such as
 \gls{ELU}~\cite{clevert2015fast}, \gls{PReLU}~\cite{he2015delving}, softplus~\cite{7280459}
 \gls{ELU}~\cite{clevert2015fast}, \gls{PReLU}~\cite{he2015delving}, softplus~\cite{7280459}
-and softsign~\cite{bergstra2009quadratic} have been proposed. The baseline uses
-\gls{ELU}.
+and softsign~\cite{bergstra2009quadratic} have been proposed.
 
 
 Activation functions differ in the range of values and the derivative. The
 Activation functions differ in the range of values and the derivative. The
 definitions and other comparisons of eleven activation functions are given
 definitions and other comparisons of eleven activation functions are given
 in~\cref{table:activation-functions-overview}.
 in~\cref{table:activation-functions-overview}.
 
 
+
+\section{Important Differences of Proposed Activation Functions}
 Theoretical explanations why one activation function is preferable to another
 Theoretical explanations why one activation function is preferable to another
 in some scenarios are the following:
 in some scenarios are the following:
 \begin{itemize}
 \begin{itemize}
@@ -96,6 +114,7 @@ in~\cref{table:HASYv2-accuracies-activation-functions}. For both datasets, the
 logistic function has a much shorter training time and a noticeably lower test
 logistic function has a much shorter training time and a noticeably lower test
 accuracy.
 accuracy.
 
 
+\glsunset{LReLU}
 \begin{table}[H]
 \begin{table}[H]
     \centering
     \centering
     \begin{tabular}{lccc}
     \begin{tabular}{lccc}
@@ -111,7 +130,7 @@ accuracy.
     ReLU          & \cellcolor{yellow!25}Yes\footnotemark & \cellcolor{red!25} No & \cellcolor{yellow!25}Half-sided \\
     ReLU          & \cellcolor{yellow!25}Yes\footnotemark & \cellcolor{red!25} No & \cellcolor{yellow!25}Half-sided \\
     Softplus      & \cellcolor{green!25}No    & \cellcolor{red!25}   No      & \cellcolor{yellow!25}Half-sided \\
     Softplus      & \cellcolor{green!25}No    & \cellcolor{red!25}   No      & \cellcolor{yellow!25}Half-sided \\
     S2ReLU        & \cellcolor{green!25}No    & \cellcolor{green!25}Yes      & \cellcolor{green!25} No \\
     S2ReLU        & \cellcolor{green!25}No    & \cellcolor{green!25}Yes      & \cellcolor{green!25} No \\
-    LReLU/PReLU   & \cellcolor{green!25}No    & \cellcolor{green!25}Yes      & \cellcolor{green!25} No \\
+    \gls{LReLU}/PReLU   & \cellcolor{green!25}No    & \cellcolor{green!25}Yes      & \cellcolor{green!25} No \\
     ELU           & \cellcolor{green!25}No    & \cellcolor{green!25}Yes      & \cellcolor{green!25} No \\
     ELU           & \cellcolor{green!25}No    & \cellcolor{green!25}Yes      & \cellcolor{green!25} No \\
     \bottomrule
     \bottomrule
     \end{tabular}
     \end{tabular}
@@ -120,8 +139,6 @@ accuracy.
 \end{table}
 \end{table}
 \footnotetext{The dying ReLU problem is similar to the vanishing gradient problem.}
 \footnotetext{The dying ReLU problem is similar to the vanishing gradient problem.}
 
 
-\glsunset{LReLU}
-
 \begin{table}[H]
 \begin{table}[H]
     \centering
     \centering
     \begin{tabular}{lccclllll}
     \begin{tabular}{lccclllll}
@@ -173,4 +190,5 @@ accuracy.
              functions on MNIST.}
              functions on MNIST.}
     \label{table:MNIST-accuracies-activation-functions}
     \label{table:MNIST-accuracies-activation-functions}
 \end{table}
 \end{table}
-\glsreset{LReLU}
+\glsreset{LReLU}
+

+ 10 - 1
publications/activation-functions/main.tex

@@ -7,7 +7,15 @@
 \usepackage{amsmath,amssymb}
 \usepackage{amsmath,amssymb}
 \usepackage[table]{xcolor}
 \usepackage[table]{xcolor}
 \usepackage[absolute,overlay]{textpos}
 \usepackage[absolute,overlay]{textpos}
+\usepackage{pgfplots}
+\pgfplotsset{compat=1.13}
 \usepackage{tikz}
 \usepackage{tikz}
+\usetikzlibrary{arrows.meta}
+\usetikzlibrary{decorations.pathreplacing}
+\usetikzlibrary{positioning}
+\usetikzlibrary{decorations.text}
+\usetikzlibrary{decorations.pathmorphing}
+\usetikzlibrary{shapes.multipart, calc}
 \usepackage{csquotes}
 \usepackage{csquotes}
 \usepackage[binary-units,group-separator={,}]{siunitx}
 \usepackage[binary-units,group-separator={,}]{siunitx}
 \sisetup{per-mode=fraction,
 \sisetup{per-mode=fraction,
@@ -59,7 +67,7 @@
 \usepackage{braket}         % needed for \Set
 \usepackage{braket}         % needed for \Set
 \usepackage{algorithm,algpseudocode}
 \usepackage{algorithm,algpseudocode}
 
 
-\usepackage[xindy,toc,section=chapter,numberedsection=autolabel]{glossaries}
+\usepackage[xindy,toc,section=section]{glossaries}
 
 
 % Make document nicer
 % Make document nicer
 \DeclareMathOperator*{\argmin}{arg\,min}
 \DeclareMathOperator*{\argmin}{arg\,min}
@@ -93,6 +101,7 @@
 \input{content}
 \input{content}
 \bibliographystyle{IEEEtranSA}
 \bibliographystyle{IEEEtranSA}
 \bibliography{bibliography}
 \bibliography{bibliography}
+\printglossaries%
 \input{appendix}
 \input{appendix}