9 лет назад · 4e5cdcde51
--- a/source-code/Pseudocode/sarsa/Makefile
+++ b/source-code/Pseudocode/sarsa/Makefile
@@ -0,0 +1,36 @@
 
																+SOURCE = sarsa
															
 
																+DELAY = 80
															
 
																+DENSITY = 300
															
 
																+WIDTH = 512
															
 
																+
															
 
																+make:
															
 
																+	pdflatex $(SOURCE).tex -output-format=pdf
															
 
																+	pdflatex $(SOURCE).tex -output-format=pdf
															
 
																+	make clean
															
 
																+
															
 
																+clean:
															
 
																+	rm -rf  $(TARGET) *.class *.html *.log *.aux *.data *.gnuplot
															
 
																+
															
 
																+gif:
															
 
																+	pdfcrop $(SOURCE).pdf
															
 
																+	convert -verbose -delay $(DELAY) -loop 0 -density $(DENSITY) $(SOURCE)-crop.pdf $(SOURCE).gif
															
 
																+	make clean
															
 
																+
															
 
																+png:
															
 
																+	make
															
 
																+	make svg
															
 
																+	inkscape $(SOURCE).svg -w $(WIDTH) --export-png=$(SOURCE).png
															
 
																+
															
 
																+transparentGif:
															
 
																+	convert $(SOURCE).pdf -transparent white result.gif
															
 
																+	make clean
															
 
																+
															
 
																+svg:
															
 
																+	make
															
 
																+	#inkscape $(SOURCE).pdf --export-plain-svg=$(SOURCE).svg
															
 
																+	pdf2svg $(SOURCE).pdf $(SOURCE).svg
															
 
																+	# Necessary, as pdf2svg does not always create valid svgs:
															
 
																+	inkscape $(SOURCE).svg --export-plain-svg=$(SOURCE).svg
															
 
																+	rsvg-convert -a -w $(WIDTH) -f svg $(SOURCE).svg -o $(SOURCE)2.svg
															
 
																+	inkscape $(SOURCE)2.svg --export-plain-svg=$(SOURCE).svg
															
 
																+	rm $(SOURCE)2.svg
															
--- a/source-code/Pseudocode/sarsa/README.md
+++ b/source-code/Pseudocode/sarsa/README.md
@@ -0,0 +1,3 @@
 
																+Compiled example
															
 
																+----------------
															
 
																+![Example](sarsa.png)
															
--- a/source-code/Pseudocode/sarsa/sarsa.png
+++ b/source-code/Pseudocode/sarsa/sarsa.png
--- a/source-code/Pseudocode/sarsa/sarsa.tex
+++ b/source-code/Pseudocode/sarsa/sarsa.tex
@@ -0,0 +1,50 @@
 
																+\documentclass{article}
															
 
																+\usepackage[pdftex,active,tightpage]{preview}
															
 
																+\setlength\PreviewBorder{2mm}
															
 
																+
															
 
																+\usepackage[utf8]{inputenc} % this is needed for umlauts
															
 
																+\usepackage[ngerman]{babel} % this is needed for umlauts
															
 
																+\usepackage[T1]{fontenc}    % this is needed for correct output of umlauts in pdf
															
 
																+\usepackage{amssymb,amsmath,amsfonts} % nice math rendering
															
 
																+\usepackage{braket} % needed for \Set
															
 
																+\usepackage{caption}
															
 
																+\usepackage{algorithm}
															
 
																+\usepackage[noend]{algpseudocode}
															
 
																+
															
 
																+\DeclareCaptionFormat{myformat}{#3}
															
 
																+\captionsetup[algorithm]{format=myformat}
															
 
																+
															
 
																+\begin{document}
															
 
																+\begin{preview}
															
 
																+    \begin{algorithm}[H]
															
 
																+        \begin{algorithmic}
															
 
																+        \Require
															
 
																+        \Statex Sates $\mathcal{X} = \{1, \dots, n_x\}$
															
 
																+        \Statex Actions $\mathcal{A} = \{1, \dots, n_a\},\qquad A: \mathcal{X} \Rightarrow \mathcal{A}$
															
 
																+        \Statex Reward function $R: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$
															
 
																+        \Statex Black-box (probabilistic) transition function $T: \mathcal{X} \times \mathcal{A} \rightarrow \mathcal{X}$
															
 
																+        \Statex Learning rate $\alpha \in [0, 1]$, typically $\alpha = 0.1$
															
 
																+        \Statex Discounting factor $\gamma \in [0, 1]$
															
 
																+        \Statex $\lambda \in [0, 1]$: Trade-off between TD and MC
															
 
																+        \Procedure{SARSA}{$\mathcal{X}$, $A$, $R$, $T$, $\alpha$, $\gamma$, $\lambda$}
															
 
																+            \State Initialize $Q: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$ arbitrarily
															
 
																+            \While{$Q$ is not converged}
															
 
																+                \State Select $(s, a) \in \mathcal{X} \times \mathcal{A}$ arbitrarily
															
 
																+                \While{$s$ is not terminal}
															
 
																+                    \State $r \gets R(s, a)$ \Comment{Receive the reward}
															
 
																+                    \State $s' \gets T(s, a)$ \Comment{Receive the new state}
															
 
																+                    \State Calculate $\pi$ based on $Q$ (e.g. epsilon-greedy)
															
 
																+                    \State $a' \gets \pi(s')$
															
 
																+                    \State $Q(s, a) \gets (1 - \alpha ) \cdot Q(s, a) + \alpha \cdot (r + \gamma Q(s', a'))$
															
 
																+                    \State $s \gets s'$
															
 
																+                    \State $a \gets a'$
															
 
																+                \EndWhile
															
 
																+            \EndWhile
															
 
																+            \Return $Q$
															
 
																+        \EndProcedure
															
 
																+        \end{algorithmic}
															
 
																+    \caption{SARSA: Learn function $Q: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$}
															
 
																+    \label{alg:sarsa}
															
 
																+    \end{algorithm}
															
 
																+\end{preview}
															
 
																+\end{document}