ssm_intro.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. #!/usr/bin/env python
  2. # coding: utf-8
  3. # ```{math}
  4. #
  5. # \newcommand\floor[1]{\lfloor#1\rfloor}
  6. #
  7. # \newcommand{\real}{\mathbb{R}}
  8. #
  9. # % Numbers
  10. # \newcommand{\vzero}{\boldsymbol{0}}
  11. # \newcommand{\vone}{\boldsymbol{1}}
  12. #
  13. # % Greek https://www.latex-tutorial.com/symbols/greek-alphabet/
  14. # \newcommand{\valpha}{\boldsymbol{\alpha}}
  15. # \newcommand{\vbeta}{\boldsymbol{\beta}}
  16. # \newcommand{\vchi}{\boldsymbol{\chi}}
  17. # \newcommand{\vdelta}{\boldsymbol{\delta}}
  18. # \newcommand{\vDelta}{\boldsymbol{\Delta}}
  19. # \newcommand{\vepsilon}{\boldsymbol{\epsilon}}
  20. # \newcommand{\vzeta}{\boldsymbol{\zeta}}
  21. # \newcommand{\vXi}{\boldsymbol{\Xi}}
  22. # \newcommand{\vell}{\boldsymbol{\ell}}
  23. # \newcommand{\veta}{\boldsymbol{\eta}}
  24. # %\newcommand{\vEta}{\boldsymbol{\Eta}}
  25. # \newcommand{\vgamma}{\boldsymbol{\gamma}}
  26. # \newcommand{\vGamma}{\boldsymbol{\Gamma}}
  27. # \newcommand{\vmu}{\boldsymbol{\mu}}
  28. # \newcommand{\vmut}{\boldsymbol{\tilde{\mu}}}
  29. # \newcommand{\vnu}{\boldsymbol{\nu}}
  30. # \newcommand{\vkappa}{\boldsymbol{\kappa}}
  31. # \newcommand{\vlambda}{\boldsymbol{\lambda}}
  32. # \newcommand{\vLambda}{\boldsymbol{\Lambda}}
  33. # \newcommand{\vLambdaBar}{\overline{\vLambda}}
  34. # %\newcommand{\vnu}{\boldsymbol{\nu}}
  35. # \newcommand{\vomega}{\boldsymbol{\omega}}
  36. # \newcommand{\vOmega}{\boldsymbol{\Omega}}
  37. # \newcommand{\vphi}{\boldsymbol{\phi}}
  38. # \newcommand{\vvarphi}{\boldsymbol{\varphi}}
  39. # \newcommand{\vPhi}{\boldsymbol{\Phi}}
  40. # \newcommand{\vpi}{\boldsymbol{\pi}}
  41. # \newcommand{\vPi}{\boldsymbol{\Pi}}
  42. # \newcommand{\vpsi}{\boldsymbol{\psi}}
  43. # \newcommand{\vPsi}{\boldsymbol{\Psi}}
  44. # \newcommand{\vrho}{\boldsymbol{\rho}}
  45. # \newcommand{\vtheta}{\boldsymbol{\theta}}
  46. # \newcommand{\vthetat}{\boldsymbol{\tilde{\theta}}}
  47. # \newcommand{\vTheta}{\boldsymbol{\Theta}}
  48. # \newcommand{\vsigma}{\boldsymbol{\sigma}}
  49. # \newcommand{\vSigma}{\boldsymbol{\Sigma}}
  50. # \newcommand{\vSigmat}{\boldsymbol{\tilde{\Sigma}}}
  51. # \newcommand{\vsigmoid}{\vsigma}
  52. # \newcommand{\vtau}{\boldsymbol{\tau}}
  53. # \newcommand{\vxi}{\boldsymbol{\xi}}
  54. #
  55. #
  56. # % Lower Roman (Vectors)
  57. # \newcommand{\va}{\mathbf{a}}
  58. # \newcommand{\vb}{\mathbf{b}}
  59. # \newcommand{\vBt}{\mathbf{\tilde{B}}}
  60. # \newcommand{\vc}{\mathbf{c}}
  61. # \newcommand{\vct}{\mathbf{\tilde{c}}}
  62. # \newcommand{\vd}{\mathbf{d}}
  63. # \newcommand{\ve}{\mathbf{e}}
  64. # \newcommand{\vf}{\mathbf{f}}
  65. # \newcommand{\vg}{\mathbf{g}}
  66. # \newcommand{\vh}{\mathbf{h}}
  67. # %\newcommand{\myvh}{\mathbf{h}}
  68. # \newcommand{\vi}{\mathbf{i}}
  69. # \newcommand{\vj}{\mathbf{j}}
  70. # \newcommand{\vk}{\mathbf{k}}
  71. # \newcommand{\vl}{\mathbf{l}}
  72. # \newcommand{\vm}{\mathbf{m}}
  73. # \newcommand{\vn}{\mathbf{n}}
  74. # \newcommand{\vo}{\mathbf{o}}
  75. # \newcommand{\vp}{\mathbf{p}}
  76. # \newcommand{\vq}{\mathbf{q}}
  77. # \newcommand{\vr}{\mathbf{r}}
  78. # \newcommand{\vs}{\mathbf{s}}
  79. # \newcommand{\vt}{\mathbf{t}}
  80. # \newcommand{\vu}{\mathbf{u}}
  81. # \newcommand{\vv}{\mathbf{v}}
  82. # \newcommand{\vw}{\mathbf{w}}
  83. # \newcommand{\vws}{\vw_s}
  84. # \newcommand{\vwt}{\mathbf{\tilde{w}}}
  85. # \newcommand{\vWt}{\mathbf{\tilde{W}}}
  86. # \newcommand{\vwh}{\hat{\vw}}
  87. # \newcommand{\vx}{\mathbf{x}}
  88. # %\newcommand{\vx}{\mathbf{x}}
  89. # \newcommand{\vxt}{\mathbf{\tilde{x}}}
  90. # \newcommand{\vy}{\mathbf{y}}
  91. # \newcommand{\vyt}{\mathbf{\tilde{y}}}
  92. # \newcommand{\vz}{\mathbf{z}}
  93. # %\newcommand{\vzt}{\mathbf{\tilde{z}}}
  94. #
  95. #
  96. # % Upper Roman (Matrices)
  97. # \newcommand{\vA}{\mathbf{A}}
  98. # \newcommand{\vB}{\mathbf{B}}
  99. # \newcommand{\vC}{\mathbf{C}}
  100. # \newcommand{\vD}{\mathbf{D}}
  101. # \newcommand{\vE}{\mathbf{E}}
  102. # \newcommand{\vF}{\mathbf{F}}
  103. # \newcommand{\vG}{\mathbf{G}}
  104. # \newcommand{\vH}{\mathbf{H}}
  105. # \newcommand{\vI}{\mathbf{I}}
  106. # \newcommand{\vJ}{\mathbf{J}}
  107. # \newcommand{\vK}{\mathbf{K}}
  108. # \newcommand{\vL}{\mathbf{L}}
  109. # \newcommand{\vM}{\mathbf{M}}
  110. # \newcommand{\vMt}{\mathbf{\tilde{M}}}
  111. # \newcommand{\vN}{\mathbf{N}}
  112. # \newcommand{\vO}{\mathbf{O}}
  113. # \newcommand{\vP}{\mathbf{P}}
  114. # \newcommand{\vQ}{\mathbf{Q}}
  115. # \newcommand{\vR}{\mathbf{R}}
  116. # \newcommand{\vS}{\mathbf{S}}
  117. # \newcommand{\vT}{\mathbf{T}}
  118. # \newcommand{\vU}{\mathbf{U}}
  119. # \newcommand{\vV}{\mathbf{V}}
  120. # \newcommand{\vW}{\mathbf{W}}
  121. # \newcommand{\vX}{\mathbf{X}}
  122. # %\newcommand{\vXs}{\vX_{\vs}}
  123. # \newcommand{\vXs}{\vX_{s}}
  124. # \newcommand{\vXt}{\mathbf{\tilde{X}}}
  125. # \newcommand{\vY}{\mathbf{Y}}
  126. # \newcommand{\vZ}{\mathbf{Z}}
  127. # \newcommand{\vZt}{\mathbf{\tilde{Z}}}
  128. # \newcommand{\vzt}{\mathbf{\tilde{z}}}
  129. #
  130. #
  131. # %%%%
  132. # \newcommand{\hidden}{\vz}
  133. # \newcommand{\hid}{\hidden}
  134. # \newcommand{\observed}{\vy}
  135. # \newcommand{\obs}{\observed}
  136. # \newcommand{\inputs}{\vu}
  137. # \newcommand{\input}{\inputs}
  138. #
  139. # \newcommand{\hmmTrans}{\vA}
  140. # \newcommand{\hmmObs}{\vB}
  141. # \newcommand{\hmmInit}{\vpi}
  142. # \newcommand{\hmmhid}{\hidden}
  143. # \newcommand{\hmmobs}{\obs}
  144. #
  145. # \newcommand{\ldsDyn}{\vA}
  146. # \newcommand{\ldsObs}{\vC}
  147. # \newcommand{\ldsDynIn}{\vB}
  148. # \newcommand{\ldsObsIn}{\vD}
  149. # \newcommand{\ldsDynNoise}{\vQ}
  150. # \newcommand{\ldsObsNoise}{\vR}
  151. #
  152. # \newcommand{\ssmDynFn}{f}
  153. # \newcommand{\ssmObsFn}{h}
  154. #
  155. #
  156. # %%%
  157. # \newcommand{\gauss}{\mathcal{N}}
  158. #
  159. # \newcommand{\diag}{\mathrm{diag}}
  160. # ```
  161. #
  162. #
  163. # (sec:ssm-intro)=
  164. # # What are State Space Models?
  165. #
  166. #
  167. # A state space model or SSM
  168. # is a partially observed Markov model,
  169. # in which the hidden state, $\hidden_t$,
  170. # evolves over time according to a Markov process,
  171. # possibly conditional on external inputs or controls $\input_t$,
  172. # and each hidden state generates some
  173. # observations $\obs_t$ at each time step.
  174. # (In this book, we mostly focus on discrete time systems,
  175. # although we consider the continuous-time case in XXX.)
  176. # We get to see the observations, but not the hidden state.
  177. # Our main goal is to infer the hidden state given the observations.
  178. # However, we can also use the model to predict future observations,
  179. # by first predicting future hidden states, and then predicting
  180. # what observations they might generate.
  181. # By using a hidden state $\hidden_t$
  182. # to represent the past observations, $\obs_{1:t-1}$,
  183. # the model can have ``infinite'' memory,
  184. # unlike a standard Markov model.
  185. #
  186. # ```{figure} /figures/SSM-AR-inputs.png
  187. # :height: 300px
  188. # :name: fig:ssm-ar
  189. #
  190. # Illustration of an SSM as a graphical model.
  191. # ```
  192. #
  193. #
  194. # Formally we can define an SSM
  195. # as the following joint distribution:
  196. # ```{math}
  197. # :label: eq:SSM-ar
  198. # p(\hmmobs_{1:T},\hmmhid_{1:T}|\inputs_{1:T})
  199. # = \left[ p(\hmmhid_1|\inputs_1) \prod_{t=2}^{T}
  200. # p(\hmmhid_t|\hmmhid_{t-1},\inputs_t) \right]
  201. # \left[ \prod_{t=1}^T p(\hmmobs_t|\hmmhid_t, \inputs_t, \hmmobs_{t-1}) \right]
  202. # ```
  203. # where $p(\hmmhid_t|\hmmhid_{t-1},\inputs_t)$ is the
  204. # transition model,
  205. # $p(\hmmobs_t|\hmmhid_t, \inputs_t, \hmmobs_{t-1})$ is the
  206. # observation model,
  207. # and $\inputs_{t}$ is an optional input or action.
  208. # See {numref}`fig:ssm-ar`
  209. # for an illustration of the corresponding graphical model.
  210. #
  211. #
  212. # We often consider a simpler setting in which the
  213. # observations are conditionally independent of each other
  214. # (rather than having Markovian dependencies) given the hidden state.
  215. # In this case the joint simplifies to
  216. # ```{math}
  217. # :label: eq:SSM-input
  218. # p(\hmmobs_{1:T},\hmmhid_{1:T}|\inputs_{1:T})
  219. # = \left[ p(\hmmhid_1|\inputs_1) \prod_{t=2}^{T}
  220. # p(\hmmhid_t|\hmmhid_{t-1},\inputs_t) \right]
  221. # \left[ \prod_{t=1}^T p(\hmmobs_t|\hmmhid_t, \inputs_t) \right]
  222. # ```
  223. # Sometimes there are no external inputs, so the model further
  224. # simplifies to the following unconditional generative model:
  225. # ```{math}
  226. # :label: eq:SSM-no-input
  227. # p(\hmmobs_{1:T},\hmmhid_{1:T})
  228. # = \left[ p(\hmmhid_1) \prod_{t=2}^{T}
  229. # p(\hmmhid_t|\hmmhid_{t-1}) \right]
  230. # \left[ \prod_{t=1}^T p(\hmmobs_t|\hmmhid_t) \right]
  231. # ```
  232. # See {numref}`ssm-simplified`
  233. # for an illustration of the corresponding graphical model.
  234. #
  235. #
  236. # ```{figure} /figures/SSM-simplified.png
  237. # :scale: 100%
  238. # :name: ssm-simplified
  239. #
  240. # Illustration of a simplified SSM.
  241. # ```
  242. #
  243. #
  244. #