Variational Inference - Monte Carlo ELBO in PyTorch
Black Box Variational Inference in PyTorch¶
This post is an analogue of my recent post using the Monte Carlo ELBO estimate but this time in PyTorch. I have heard lots of good things about Pytorch, but haven't had the opportunity to use it much, so this blog post constitutes a simple implementation of a common VI method using pytorch.
In [1]:
import numpy as np
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
Start by generating the same problem as before. A simple noisy linear relationship.
In [2]:
N = 200
X = np.random.uniform(low=-50, high=50, size=(N, 1))
T = 3.2*X + np.random.normal(size=(N, 1), scale=5.5)
In [3]:
plt.scatter(X,T)
Out[3]:
In [4]:
def log_norm(x, mu, std):
"""Compute the log pdf of x,
under a normal distribution with mean mu and standard deviation std."""
return -0.5 * torch.log(2*np.pi*std**2) -(0.5 * (1/(std**2))* (x-mu)**2)
In [5]:
class MCElbo(torch.nn.Module):
def __init__(self):
super(MCElbo, self).__init__()
self.n_latent = 100 # Number of latent samples
self.softplus = torch.nn.Softplus()
#The parameters we adjust during training.
self.qm = torch.nn.Parameter(torch.randn(1,1), requires_grad=True)
self.qs= torch.nn.Parameter(torch.randn(1,1), requires_grad=True)
#create holders for prior mean and std, and likelihood std.
self.prior_m = Variable(torch.randn(1,1), requires_grad=False)
self.prior_s = Variable(torch.randn(1,1), requires_grad=False)
self.likelihood_s = Variable(torch.FloatTensor((1)), requires_grad=False)
#Set the prior and likelihood moments.
self.prior_s.data.fill_(1.0)
self.prior_m.data.fill_(0.9)
self.likelihood_s.data.fill_(5.5)
def generate_rand(self):
return np.random.normal(size=(self.n_latent,1))
def reparam(self, eps):
eps = Variable(torch.FloatTensor(eps))
return eps.mul(self.softplus(self.qs)).add(self.qm)
def compute_elbo(self, x, t):
eps = self.generate_rand()
z = self.reparam(eps)
q_likelihood = torch.mean(log_norm(z, self.qm, self.softplus(self.qs)))
prior = torch.mean(log_norm(z, self.prior_m, self.prior_s))
likelihood = torch.mean(torch.sum(log_norm(t, x*z.transpose(0,1),
self.likelihood_s), 0))
kld_mc = q_likelihood - prior
loss = likelihood - kld_mc
return loss
In [6]:
c = MCElbo()
optimizer = torch.optim.Adam(c.parameters(),lr=0.2)
x = Variable(torch.Tensor(X), requires_grad=False)
t = Variable(torch.Tensor(T), requires_grad=False)
for i in range(2001):
loss = -c.compute_elbo(x, t)
optimizer.zero_grad()
loss.backward(retain_graph=True)
optimizer.step()
if i % 250 ==0:
print(c.qm.data.numpy(), (c.softplus(c.qs).data**2).numpy())
In [7]:
analytical_posterior_var = ((1/5.5**2)*X.T@ X +1)**-1
analytical_posterior_var
Out[7]:
In [8]:
analytical_posterior_mean = analytical_posterior_var*(0.9+((1/5.5**2)*X.T @ T))
analytical_posterior_mean
Out[8]:
In [9]:
from scipy.stats import norm
xn = np.arange(3.1, 3.5, 0.0001)
true_dist = norm(loc = analytical_posterior_mean,
scale =(analytical_posterior_var)**0.5)
q_dist = norm(loc = c.qm.data.numpy(), scale = c.softplus(c.qs).data.numpy())
yn = true_dist.pdf(xn).ravel()
plt.plot(xn, yn, linewidth=3, label="True Posterior")
plt.plot(xn, q_dist.pdf(xn).ravel(), '--', linewidth=3,
label="Approximation")
plt.legend()
Out[9]:
Summary¶
Thankfully, this performs almost identically to the Tensorflow version!