@@ -37,9 +37,10 @@
\State $\pi(x) \gets \arg \min_a \{Q(x, a)\}$
\EndFor
\EndWhile
+ \Return $\pi$
\EndProcedure
\end{algorithmic}
- \caption{Policy Iteration}
+ \caption{Policy Iteration: Learning a policy $\pi: \mathcal{X} \rightarrow \mathcal{A}$}
\label{alg:policy-iteration}
\end{algorithm}
\end{preview}