|
@@ -233,8 +233,14 @@ def print_source(fname):
|
|
|
# \newcommand{\ldsDynNoise}{\vQ}
|
|
|
# \newcommand{\ldsObsNoise}{\vR}
|
|
|
#
|
|
|
-# \newcommand{\ssmDyn}{f}
|
|
|
-# \newcommand{\ssmObs}{h}
|
|
|
+# \newcommand{\ssmDynFn}{f}
|
|
|
+# \newcommand{\ssmObsFn}{h}
|
|
|
+#
|
|
|
+#
|
|
|
+# %%%
|
|
|
+# \newcommand{\gauss}{\mathcal{N}}
|
|
|
+#
|
|
|
+# \newcommand{\diag}{\mathrm{diag}}
|
|
|
# ```
|
|
|
#
|
|
|
|
|
@@ -264,7 +270,7 @@ def print_source(fname):
|
|
|
# Formally we can define an SSM
|
|
|
# as the following joint distribution:
|
|
|
# ```{math}
|
|
|
-# :label: SSMfull
|
|
|
+# :label: eq:SSM-ar
|
|
|
# p(\hmmobs_{1:T},\hmmhid_{1:T}|\inputs_{1:T})
|
|
|
# = \left[ p(\hmmhid_1|\inputs_1) \prod_{t=2}^{T}
|
|
|
# p(\hmmhid_t|\hmmhid_{t-1},\inputs_t) \right]
|
|
@@ -286,21 +292,29 @@ def print_source(fname):
|
|
|
# Illustration of an SSM as a graphical model.
|
|
|
# ```
|
|
|
#
|
|
|
-# We often consider a simpler setting in which there
|
|
|
-# are no external inputs,
|
|
|
-# and the observations are conditionally independent of each other
|
|
|
+#
|
|
|
+# We often consider a simpler setting in which the
|
|
|
+# observations are conditionally independent of each other
|
|
|
# (rather than having Markovian dependencies) given the hidden state.
|
|
|
# In this case the joint simplifies to
|
|
|
# ```{math}
|
|
|
-# :label: SSMsimplified
|
|
|
+# :label: eq:SSM-input
|
|
|
+# p(\hmmobs_{1:T},\hmmhid_{1:T}|\inputs_{1:T})
|
|
|
+# = \left[ p(\hmmhid_1|\inputs_1) \prod_{t=2}^{T}
|
|
|
+# p(\hmmhid_t|\hmmhid_{t-1},\inputs_t) \right]
|
|
|
+# \left[ \prod_{t=1}^T p(\hmmobs_t|\hmmhid_t, \inputs_t) \right]
|
|
|
+# ```
|
|
|
+# Sometimes there are no external inputs, so the model further
|
|
|
+# simplifies to the following unconditional generative model:
|
|
|
+# ```{math}
|
|
|
+# :label: eq:SSM-no-input
|
|
|
# p(\hmmobs_{1:T},\hmmhid_{1:T})
|
|
|
# = \left[ p(\hmmhid_1) \prod_{t=2}^{T}
|
|
|
# p(\hmmhid_t|\hmmhid_{t-1}) \right]
|
|
|
-# \left[ \prod_{t=1}^T p(\hmmobs_t|\hmmhid_t \right]
|
|
|
+# \left[ \prod_{t=1}^T p(\hmmobs_t|\hmmhid_t) \right]
|
|
|
# ```
|
|
|
# See {numref}`Figure %s <ssm-simplified>`
|
|
|
# for an illustration of the corresponding graphical model.
|
|
|
-# Compare {eq}`SSMfull` and {eq}`SSMsimplified`.
|
|
|
#
|
|
|
#
|
|
|
# ```{figure} /figures/SSM-simplified.png
|
|
@@ -445,21 +459,441 @@ print_source(hmm.sample)
|
|
|
# affect how we choose to gamble our money.
|
|
|
# We discuss various ways to perform this inference below.
|
|
|
|
|
|
+# (sec:lillypad)=
|
|
|
+# ## Example: Lillypad HMM
|
|
|
+#
|
|
|
+#
|
|
|
+# If $\obs_t$ is continuous, it is common to use a Gaussian
|
|
|
+# observation model:
|
|
|
+# ```{math}
|
|
|
+# p(\obs_t|\hidden_t=j) = \gauss(\obs_t|\vmu_j,\vSigma_j)
|
|
|
+# ```
|
|
|
+# This is sometimes called a Gaussian HMM.
|
|
|
+#
|
|
|
+# As a simple example, suppose we have an HMM with 3 hidden states,
|
|
|
+# each of which generates a 2d Gaussian.
|
|
|
+# We can represent these Gaussian distributions are 2d ellipses,
|
|
|
+# as we show below.
|
|
|
+# We call these ``lilly pads'', because of their shape.
|
|
|
+# We can imagine a frog hopping from one lilly pad to another.
|
|
|
+# (This analogy is due to the late Sam Roweis.)
|
|
|
+# The frog will stay on a pad for a while (corresponding to remaining in the same
|
|
|
+# discrete state $\hidden_t$), and then jump to a new pad
|
|
|
+# (corresponding to a transition to a new state).
|
|
|
+# The data we see are just the 2d points (e.g., water droplets)
|
|
|
+# coming from near the pad that the frog is currently on.
|
|
|
+# Thus this model is like a Gaussian mixture model,
|
|
|
+# in that it generates clusters of observations,
|
|
|
+# except now there is temporal correlation between the data points.
|
|
|
+#
|
|
|
+# Let us now illustrate this model in code.
|
|
|
+#
|
|
|
+#
|
|
|
+
|
|
|
+# In[7]:
|
|
|
+
|
|
|
+
|
|
|
+# Let us create the model
|
|
|
+
|
|
|
+initial_probs = jnp.array([0.3, 0.2, 0.5])
|
|
|
+
|
|
|
+# transition matrix
|
|
|
+A = jnp.array([
|
|
|
+[0.3, 0.4, 0.3],
|
|
|
+[0.1, 0.6, 0.3],
|
|
|
+[0.2, 0.3, 0.5]
|
|
|
+])
|
|
|
+
|
|
|
+# Observation model
|
|
|
+mu_collection = jnp.array([
|
|
|
+[0.3, 0.3],
|
|
|
+[0.8, 0.5],
|
|
|
+[0.3, 0.8]
|
|
|
+])
|
|
|
+
|
|
|
+S1 = jnp.array([[1.1, 0], [0, 0.3]])
|
|
|
+S2 = jnp.array([[0.3, -0.5], [-0.5, 1.3]])
|
|
|
+S3 = jnp.array([[0.8, 0.4], [0.4, 0.5]])
|
|
|
+cov_collection = jnp.array([S1, S2, S3]) / 60
|
|
|
+
|
|
|
+
|
|
|
+import tensorflow_probability as tfp
|
|
|
+
|
|
|
+if False:
|
|
|
+ hmm = HMM(trans_dist=distrax.Categorical(probs=A),
|
|
|
+ init_dist=distrax.Categorical(probs=initial_probs),
|
|
|
+ obs_dist=distrax.MultivariateNormalFullCovariance(
|
|
|
+ loc=mu_collection, covariance_matrix=cov_collection))
|
|
|
+else:
|
|
|
+ hmm = HMM(trans_dist=distrax.Categorical(probs=A),
|
|
|
+ init_dist=distrax.Categorical(probs=initial_probs),
|
|
|
+ obs_dist=distrax.as_distribution(
|
|
|
+ tfp.substrates.jax.distributions.MultivariateNormalFullCovariance(loc=mu_collection,
|
|
|
+ covariance_matrix=cov_collection)))
|
|
|
+
|
|
|
+print(hmm)
|
|
|
+
|
|
|
+
|
|
|
+# In[8]:
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+n_samples, seed = 50, 10
|
|
|
+samples_state, samples_obs = hmm.sample(seed=PRNGKey(seed), seq_len=n_samples)
|
|
|
+
|
|
|
+print(samples_state.shape)
|
|
|
+print(samples_obs.shape)
|
|
|
+
|
|
|
+
|
|
|
+# In[9]:
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+# Let's plot the observed data in 2d
|
|
|
+xmin, xmax = 0, 1
|
|
|
+ymin, ymax = 0, 1.2
|
|
|
+colors = ["tab:green", "tab:blue", "tab:red"]
|
|
|
+
|
|
|
+def plot_2dhmm(hmm, samples_obs, samples_state, colors, ax, xmin, xmax, ymin, ymax, step=1e-2):
|
|
|
+ obs_dist = hmm.obs_dist
|
|
|
+ color_sample = [colors[i] for i in samples_state]
|
|
|
+
|
|
|
+ xs = jnp.arange(xmin, xmax, step)
|
|
|
+ ys = jnp.arange(ymin, ymax, step)
|
|
|
+
|
|
|
+ v_prob = vmap(lambda x, y: obs_dist.prob(jnp.array([x, y])), in_axes=(None, 0))
|
|
|
+ z = vmap(v_prob, in_axes=(0, None))(xs, ys)
|
|
|
+
|
|
|
+ grid = np.mgrid[xmin:xmax:step, ymin:ymax:step]
|
|
|
+
|
|
|
+ for k, color in enumerate(colors):
|
|
|
+ ax.contour(*grid, z[:, :, k], levels=[1], colors=color, linewidths=3)
|
|
|
+ ax.text(*(obs_dist.mean()[k] + 0.13), f"$k$={k + 1}", fontsize=13, horizontalalignment="right")
|
|
|
+
|
|
|
+ ax.plot(*samples_obs.T, c="black", alpha=0.3, zorder=1)
|
|
|
+ ax.scatter(*samples_obs.T, c=color_sample, s=30, zorder=2, alpha=0.8)
|
|
|
+
|
|
|
+ return ax, color_sample
|
|
|
+
|
|
|
+
|
|
|
+fig, ax = plt.subplots()
|
|
|
+_, color_sample = plot_2dhmm(hmm, samples_obs, samples_state, colors, ax, xmin, xmax, ymin, ymax)
|
|
|
+
|
|
|
+
|
|
|
+# In[10]:
|
|
|
+
|
|
|
+
|
|
|
+# Let's plot the hidden state sequence
|
|
|
+
|
|
|
+fig, ax = plt.subplots()
|
|
|
+ax.step(range(n_samples), samples_state, where="post", c="black", linewidth=1, alpha=0.3)
|
|
|
+ax.scatter(range(n_samples), samples_state, c=color_sample, zorder=3)
|
|
|
+
|
|
|
+
|
|
|
+# (sec:lds-intro)=
|
|
|
# # Linear Gaussian SSMs
|
|
|
#
|
|
|
-# Blah blah
|
|
|
+#
|
|
|
+# Consider the state space model in
|
|
|
+# {eq}`eq:SSM-ar`
|
|
|
+# where we assume the observations are conditionally iid given the
|
|
|
+# hidden states and inputs (i.e. there are no auto-regressive dependencies
|
|
|
+# between the observables).
|
|
|
+# We can rewrite this model as
|
|
|
+# a stochastic nonlinear dynamical system (NLDS)
|
|
|
+# by defining the distribution of the next hidden state
|
|
|
+# as a deterministic function of the past state
|
|
|
+# plus random process noise $\vepsilon_t$
|
|
|
+# \begin{align}
|
|
|
+# \hmmhid_t &= \ssmDynFn(\hmmhid_{t-1}, \inputs_t, \vepsilon_t)
|
|
|
+# \end{align}
|
|
|
+# where $\vepsilon_t$ is drawn from the distribution such
|
|
|
+# that the induced distribution
|
|
|
+# on $\hmmhid_t$ matches $p(\hmmhid_t|\hmmhid_{t-1}, \inputs_t)$.
|
|
|
+# Similarly we can rewrite the observation distributions
|
|
|
+# as a deterministic function of the hidden state
|
|
|
+# plus observation noise $\veta_t$:
|
|
|
+# \begin{align}
|
|
|
+# \hmmobs_t &= \ssmObsFn(\hmmhid_{t}, \inputs_t, \veta_t)
|
|
|
+# \end{align}
|
|
|
+#
|
|
|
+#
|
|
|
+# If we assume additive Gaussian noise,
|
|
|
+# the model becomes
|
|
|
+# \begin{align}
|
|
|
+# \hmmhid_t &= \ssmDynFn(\hmmhid_{t-1}, \inputs_t) + \vepsilon_t \\
|
|
|
+# \hmmobs_t &= \ssmObsFn(\hmmhid_{t}, \inputs_t) + \veta_t
|
|
|
+# \end{align}
|
|
|
+# where $\vepsilon_t \sim \gauss(\vzero,\vQ_t)$
|
|
|
+# and $\veta_t \sim \gauss(\vzero,\vR_t)$.
|
|
|
+# We will call these Gaussian SSMs.
|
|
|
+#
|
|
|
+# If we additionally assume
|
|
|
+# the transition function $\ssmDynFn$
|
|
|
+# and the observation function $\ssmObsFn$ are both linear,
|
|
|
+# then we can rewrite the model as follows:
|
|
|
+# \begin{align}
|
|
|
+# p(\hmmhid_t|\hmmhid_{t-1},\inputs_t) &= \gauss(\hmmhid_t|\ldsDyn_t \hmmhid_{t-1}
|
|
|
+# + \ldsDynIn_t \inputs_t, \vQ_t)
|
|
|
+# \\
|
|
|
+# p(\hmmobs_t|\hmmhid_t,\inputs_t) &= \gauss(\hmmobs_t|\ldsObs_t \hmmhid_{t}
|
|
|
+# + \ldsObsIn_t \inputs_t, \vR_t)
|
|
|
+# \end{align}
|
|
|
+# This is called a
|
|
|
+# linear-Gaussian state space model
|
|
|
+# (LG-SSM),
|
|
|
+# or a
|
|
|
+# linear dynamical system (LDS).
|
|
|
+# We usually assume the parameters are independent of time, in which case
|
|
|
+# the model is said to be time-invariant or homogeneous.
|
|
|
+#
|
|
|
|
|
|
# (sec:tracking-lds)=
|
|
|
-# ## Example: model for 2d tracking
|
|
|
+# (sec:kalman-tracking)=
|
|
|
+# ## Example: tracking a 2d point
|
|
|
+#
|
|
|
+#
|
|
|
+#
|
|
|
+# % Sarkkar p43
|
|
|
+# Consider an object moving in $\real^2$.
|
|
|
+# Let the state be
|
|
|
+# the position and velocity of the object,
|
|
|
+# $$\vz_t =\begin{pmatrix} u_t & \dot{u}_t & v_t & \dot{v}_t \end{pmatrix}$$.
|
|
|
+# (We use $u$ and $v$ for the two coordinates,
|
|
|
+# to avoid confusion with the state and observation variables.)
|
|
|
+# If we use Euler discretization,
|
|
|
+# the dynamics become
|
|
|
+# \begin{align}
|
|
|
+# \underbrace{\begin{pmatrix} u_t\\ \dot{u}_t \\ v_t \\ \dot{v}_t \end{pmatrix}}_{\vz_t}
|
|
|
+# =
|
|
|
+# \underbrace{
|
|
|
+# \begin{pmatrix}
|
|
|
+# 1 & 0 & \Delta & 0 \\
|
|
|
+# 0 & 1 & 0 & \Delta\\
|
|
|
+# 0 & 0 & 1 & 0 \\
|
|
|
+# 0 & 0 & 0 & 1
|
|
|
+# \end{pmatrix}
|
|
|
+# }_{\ldsDyn}
|
|
|
+# \
|
|
|
+# \underbrace{\begin{pmatrix} u_{t-1} \\ \dot{u}_{t-1} \\ v_{t-1} \\ \dot{v}_{t-1} \end{pmatrix}}_{\vz_{t-1}}
|
|
|
+# + \vepsilon_t
|
|
|
+# \end{align}
|
|
|
+# where $\vepsilon_t \sim \gauss(\vzero,\vQ)$ is
|
|
|
+# the process noise.
|
|
|
+#
|
|
|
+# Let us assume
|
|
|
+# that the process noise is
|
|
|
+# a white noise process added to the velocity components
|
|
|
+# of the state, but not to the location.
|
|
|
+# (This is known as a random accelerations model.)
|
|
|
+# We can approximate the resulting process in discrete time by assuming
|
|
|
+# $\vQ = \diag(0, q, 0, q)$.
|
|
|
+# (See {cite}`Sarkka13` p60 for a more accurate way
|
|
|
+# to convert the continuous time process to discrete time.)
|
|
|
+#
|
|
|
+#
|
|
|
+# Now suppose that at each discrete time point we
|
|
|
+# observe the location,
|
|
|
+# corrupted by Gaussian noise.
|
|
|
+# Thus the observation model becomes
|
|
|
+# \begin{align}
|
|
|
+# \underbrace{\begin{pmatrix} y_{1,t} \\ y_{2,t} \end{pmatrix}}_{\vy_t}
|
|
|
+# &=
|
|
|
+# \underbrace{
|
|
|
+# \begin{pmatrix}
|
|
|
+# 1 & 0 & 0 & 0 \\
|
|
|
+# 0 & 0 & 1 & 0
|
|
|
+# \end{pmatrix}
|
|
|
+# }_{\ldsObs}
|
|
|
+# \
|
|
|
+# \underbrace{\begin{pmatrix} u_t\\ \dot{u}_t \\ v_t \\ \dot{v}_t \end{pmatrix}}_{\vz_t}
|
|
|
+# + \veta_t
|
|
|
+# \end{align}
|
|
|
+# where $\veta_t \sim \gauss(\vzero,\vR)$ is the \keywordDef{observation noise}.
|
|
|
+# We see that the observation matrix $\ldsObs$ simply ``extracts'' the
|
|
|
+# relevant parts of the state vector.
|
|
|
+#
|
|
|
+# Suppose we sample a trajectory and corresponding set
|
|
|
+# of noisy observations from this model,
|
|
|
+# $(\vz_{1:T}, \vy_{1:T}) \sim p(\vz,\vy|\vtheta)$.
|
|
|
+# (We use diagonal observation noise,
|
|
|
+# $\vR = \diag(\sigma_1^2, \sigma_2^2)$.)
|
|
|
+# The results are shown below.
|
|
|
+#
|
|
|
+
|
|
|
+# In[11]:
|
|
|
+
|
|
|
+
|
|
|
+key = jax.random.PRNGKey(314)
|
|
|
+timesteps = 15
|
|
|
+delta = 1.0
|
|
|
+A = jnp.array([
|
|
|
+ [1, 0, delta, 0],
|
|
|
+ [0, 1, 0, delta],
|
|
|
+ [0, 0, 1, 0],
|
|
|
+ [0, 0, 0, 1]
|
|
|
+])
|
|
|
+
|
|
|
+C = jnp.array([
|
|
|
+ [1, 0, 0, 0],
|
|
|
+ [0, 1, 0, 0]
|
|
|
+])
|
|
|
+
|
|
|
+state_size, _ = A.shape
|
|
|
+observation_size, _ = C.shape
|
|
|
+
|
|
|
+Q = jnp.eye(state_size) * 0.001
|
|
|
+R = jnp.eye(observation_size) * 1.0
|
|
|
+# Prior parameter distribution
|
|
|
+mu0 = jnp.array([8, 10, 1, 0]).astype(float)
|
|
|
+Sigma0 = jnp.eye(state_size) * 1.0
|
|
|
+
|
|
|
+from jsl.lds.kalman_filter import LDS, smooth, filter
|
|
|
+
|
|
|
+lds = LDS(A, C, Q, R, mu0, Sigma0)
|
|
|
+print(lds)
|
|
|
+
|
|
|
+
|
|
|
+# In[12]:
|
|
|
+
|
|
|
+
|
|
|
+from jsl.demos.plot_utils import plot_ellipse
|
|
|
+
|
|
|
+def plot_tracking_values(observed, filtered, cov_hist, signal_label, ax):
|
|
|
+ timesteps, _ = observed.shape
|
|
|
+ ax.plot(observed[:, 0], observed[:, 1], marker="o", linewidth=0,
|
|
|
+ markerfacecolor="none", markeredgewidth=2, markersize=8, label="observed", c="tab:green")
|
|
|
+ ax.plot(*filtered[:, :2].T, label=signal_label, c="tab:red", marker="x", linewidth=2)
|
|
|
+ for t in range(0, timesteps, 1):
|
|
|
+ covn = cov_hist[t][:2, :2]
|
|
|
+ plot_ellipse(covn, filtered[t, :2], ax, n_std=2.0, plot_center=False)
|
|
|
+ ax.axis("equal")
|
|
|
+ ax.legend()
|
|
|
+
|
|
|
+
|
|
|
+# In[13]:
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+z_hist, x_hist = lds.sample(key, timesteps)
|
|
|
+
|
|
|
+fig_truth, axs = plt.subplots()
|
|
|
+axs.plot(x_hist[:, 0], x_hist[:, 1],
|
|
|
+ marker="o", linewidth=0, markerfacecolor="none",
|
|
|
+ markeredgewidth=2, markersize=8,
|
|
|
+ label="observed", c="tab:green")
|
|
|
+
|
|
|
+axs.plot(z_hist[:, 0], z_hist[:, 1],
|
|
|
+ linewidth=2, label="truth",
|
|
|
+ marker="s", markersize=8)
|
|
|
+axs.legend()
|
|
|
+axs.axis("equal")
|
|
|
+
|
|
|
+
|
|
|
+# The main task is to infer the hidden states given the noisy
|
|
|
+# observations, i.e., $p(\vz|\vy,\vtheta)$. We discuss the topic of inference in {ref}`sec:inference`.
|
|
|
+
|
|
|
+# (sec:nlds-intro)=
|
|
|
+# # Nonlinear Gaussian SSMs
|
|
|
+#
|
|
|
+# In this section, we consider SSMs in which the dynamics and/or observation models are nonlinear,
|
|
|
+# but the process noise and observation noise are Gaussian.
|
|
|
+# That is,
|
|
|
+# \begin{align}
|
|
|
+# \hmmhid_t &= \ssmDynFn(\hmmhid_{t-1}, \inputs_t) + \vepsilon_t \\
|
|
|
+# \hmmobs_t &= \ssmObsFn(\hmmhid_{t}, \inputs_t) + \veta_t
|
|
|
+# \end{align}
|
|
|
+# where $\vepsilon_t \sim \gauss(\vzero,\vQ_t)$
|
|
|
+# and $\veta_t \sim \gauss(\vzero,\vR_t)$.
|
|
|
+# This is a very widely used model class. We give some examples below.
|
|
|
+
|
|
|
+# (sec:pendulum)=
|
|
|
+# ## Example: tracking a 1d pendulum
|
|
|
+#
|
|
|
+# ```{figure} /figures/pendulum.png
|
|
|
+# :scale: 100%
|
|
|
+# :name: fig:pendulum
|
|
|
+#
|
|
|
+# Illustration of a pendulum swinging.
|
|
|
+# $g$ is the force of gravity,
|
|
|
+# $w(t)$ is a random external force,
|
|
|
+# and $\alpha$ is the angle wrt the vertical.
|
|
|
+# Based on {cite}`Sarkka13` fig 3.10.
|
|
|
+#
|
|
|
+# ```
|
|
|
+#
|
|
|
+#
|
|
|
+# % Sarka p45, p74
|
|
|
+# Consider a simple pendulum of unit mass and length swinging from
|
|
|
+# a fixed attachment, as in {ref}`fig:pendulum`.
|
|
|
+# Such an object is in principle entirely deterministic in its behavior.
|
|
|
+# However, in the real world, there are often unknown forces at work
|
|
|
+# (e.g., air turbulence, friction).
|
|
|
+# We will model these by a continuous time random Gaussian noise process $w(t)$.
|
|
|
+# This gives rise to the following differential equation:
|
|
|
+# \begin{align}
|
|
|
+# \frac{d^2 \alpha}{d t^2}
|
|
|
+# = -g \sin(\alpha) + w(t)
|
|
|
+# \end{align}
|
|
|
+# We can write this as a nonlinear SSM by defining the state to be
|
|
|
+# $z_1(t) = \alpha(t)$ and $z_2(t) = d\alpha(t)/dt$.
|
|
|
+# Thus
|
|
|
+# \begin{align}
|
|
|
+# \frac{d \vz}{dt}
|
|
|
+# = \begin{pmatrix} z_2 \\ -g \sin(z_1) \end{pmatrix}
|
|
|
+# + \begin{pmatrix} 0 \\ 1 \end{pmatrix} w(t)
|
|
|
+# \end{align}
|
|
|
+# If we discretize this step size $\Delta$,
|
|
|
+# we get the following
|
|
|
+# formulation {cite}`Sarkka13` p74:
|
|
|
+# \begin{align}
|
|
|
+# \underbrace{
|
|
|
+# \begin{pmatrix} z_{1,t} \\ z_{2,t} \end{pmatrix}
|
|
|
+# }_{\hmmhid_t}
|
|
|
+# =
|
|
|
+# \underbrace{
|
|
|
+# \begin{pmatrix} z_{1,t-1} + z_{2,t-1} \Delta \\
|
|
|
+# z_{2,t-1} -g \sin(z_{1,t-1}) \Delta \end{pmatrix}
|
|
|
+# }_{\vf(\hmmhid_{t-1})}
|
|
|
+# +\vq_{t-1}
|
|
|
+# \end{align}
|
|
|
+# where $\vq_{t-1} \sim \gauss(\vzero,\vQ)$ with
|
|
|
+# \begin{align}
|
|
|
+# \vQ = q^c \begin{pmatrix}
|
|
|
+# \frac{\Delta^3}{3} & \frac{\Delta^2}{2} \\
|
|
|
+# \frac{\Delta^2}{2} & \Delta
|
|
|
+# \end{pmatrix}
|
|
|
+# \end{align}
|
|
|
+# where $q^c$ is the spectral density (continuous time variance)
|
|
|
+# of the continuous-time noise process.
|
|
|
+#
|
|
|
+#
|
|
|
+# If we observe the angular position, we
|
|
|
+# get the linear observation model
|
|
|
+# \begin{align}
|
|
|
+# y_t = \alpha_t + r_t = h(\hmmhid_t) + r_t
|
|
|
+# \end{align}
|
|
|
+# where $h(\hmmhid_t) = z_{1,t}$
|
|
|
+# and $r_t$ is the observation noise.
|
|
|
+# If we only observe the horizontal position,
|
|
|
+# we get the nonlinear observation model
|
|
|
+# \begin{align}
|
|
|
+# y_t = \sin(\alpha_t) + r_t = h(\hmmhid_t) + r_t
|
|
|
+# \end{align}
|
|
|
+# where $h(\hmmhid_t) = \sin(z_{1,t})$.
|
|
|
+#
|
|
|
+#
|
|
|
+#
|
|
|
+#
|
|
|
+#
|
|
|
#
|
|
|
-# Blah blah
|
|
|
|
|
|
# (sec:inference)=
|
|
|
# # Inferential goals
|
|
|
#
|
|
|
-# ```{figure} /figures/dbn-inference-problems.png
|
|
|
+# ```{figure} /figures/dbn-inference-problems-tikz.png
|
|
|
# :scale: 100%
|
|
|
-# :name: dbn-inference
|
|
|
+# :name: fig:dbn-inference
|
|
|
#
|
|
|
# Illustration of the different kinds of inference in an SSM.
|
|
|
# The main kinds of inference for state-space models.
|
|
@@ -503,7 +937,7 @@ print_source(hmm.sample)
|
|
|
# \cdots
|
|
|
# p(\hmmhid_{t+h}|\hmmhid_{t+h-1})
|
|
|
# \end{align}
|
|
|
-# See \cref{fig:dbn_inf_problems} for a summary of these distributions.
|
|
|
+# See {ref}`fig:dbn-inference` for a summary of these distributions.
|
|
|
#
|
|
|
# In addition to comuting posterior marginals,
|
|
|
# we may want to compute the most probable hidden sequence,
|
|
@@ -530,7 +964,7 @@ print_source(hmm.sample)
|
|
|
# to the casino HMM from {ref}`sec:casino`.
|
|
|
#
|
|
|
|
|
|
-# In[7]:
|
|
|
+# In[14]:
|
|
|
|
|
|
|
|
|
# Call inference engine
|
|
@@ -539,7 +973,7 @@ filtered_dist, _, smoothed_dist, loglik = hmm.forward_backward(x_hist)
|
|
|
map_path = hmm.viterbi(x_hist)
|
|
|
|
|
|
|
|
|
-# In[8]:
|
|
|
+# In[15]:
|
|
|
|
|
|
|
|
|
# Find the span of timesteps that the simulated systems turns to be in state 1
|
|
@@ -555,7 +989,7 @@ def find_dishonest_intervals(z_hist):
|
|
|
return spans
|
|
|
|
|
|
|
|
|
-# In[9]:
|
|
|
+# In[16]:
|
|
|
|
|
|
|
|
|
# Plot posterior
|
|
@@ -576,7 +1010,7 @@ def plot_inference(inference_values, z_hist, ax, state=1, map_estimate=False):
|
|
|
ax.set_xlabel("Observation number")
|
|
|
|
|
|
|
|
|
-# In[10]:
|
|
|
+# In[17]:
|
|
|
|
|
|
|
|
|
# Filtering
|
|
@@ -589,7 +1023,7 @@ ax.set_title("Filtered")
|
|
|
|
|
|
|
|
|
|
|
|
-# In[11]:
|
|
|
+# In[12]:
|
|
|
|
|
|
|
|
|
# Smoothing
|
|
@@ -602,7 +1036,7 @@ ax.set_title("Smoothed")
|
|
|
|
|
|
|
|
|
|
|
|
-# In[12]:
|
|
|
+# In[ ]:
|
|
|
|
|
|
|
|
|
# MAP estimation
|
|
@@ -612,7 +1046,7 @@ ax.set_ylabel("MAP state")
|
|
|
ax.set_title("Viterbi")
|
|
|
|
|
|
|
|
|
-# In[13]:
|
|
|
+# In[ ]:
|
|
|
|
|
|
|
|
|
# TODO: posterior samples
|