library(ggplot2)
heatmap = function(A){
n = nrow(A)
p = ncol(A)
df = data.frame(value = c(A), i = 1:n, j = rep(1:p, rep(n, p)))
ggplot(df, aes(j, i, fill = value)) +
geom_tile(color = "black")+
scale_fill_gradient2(low="#619CFF", mid="white", high="#F8766D")+
scale_y_reverse()+
theme_void()
}
Generate \(x\sim\mathcal{N}_2(0, I)\).
n = 1000
X = matrix(rnorm(2*n), n, 2)
colnames(X) = c("x1", "x2")
mu = c(0, 0)
axes = diag(2)
ggplot(data = data.frame(X), aes(x1, x2)) +
geom_point(alpha = 0.1)+
stat_ellipse(level = 0.8, color = "forestgreen", size = 1.5, type = "norm")+
stat_ellipse(level = 0.95, color = "forestgreen", size = 1.5, type = "norm")+
annotate("point", x = mu[1], y = mu[2], size = 3, color = "forestgreen")+
annotate("segment", x = mu[1], y = mu[2], xend = mu[1] + axes[1,1], yend = mu[2] + axes[2,1],
arrow = arrow(length = unit(0.5, "cm")), color = "forestgreen", size = 1.5)+
annotate("segment", x = mu[1], y = mu[2], xend = mu[1] + axes[1,2], yend = mu[2] + axes[2,2],
arrow = arrow(length = unit(0.5, "cm")), color = "forestgreen", size = 1.5)+
coord_fixed()
Generate \(x\sim\mathcal{N}_2(\mu, \Sigma)\).
mu = c(1,2)
U = matrix(c(1/sqrt(2), 1/sqrt(2), -1/sqrt(2), 1/sqrt(2)), 2, 2)
lambdas = c(0.2, 1)
Sigma = U %*% diag(lambdas) %*% t(U)
Sigmasqrt = U %*% diag(sqrt(lambdas)) %*% t(U)
X = t(Sigmasqrt %*% t(X) + mu)
colnames(X) = c("x1", "x2")
axes = U %*% diag(sqrt(lambdas))
plt = ggplot(data = data.frame(X), aes(x1, x2)) +
geom_point(alpha = 0.1)
plt +
stat_ellipse(level = 0.8, color = "forestgreen", size = 1.5, type = "norm")+
stat_ellipse(level = 0.95, color = "forestgreen", size = 1.5, type = "norm")+
annotate("point", x = mu[1], y = mu[2], size = 3, color = "forestgreen")+
annotate("segment", x = mu[1], y = mu[2], xend = mu[1] + axes[1,1], yend = mu[2] + axes[2,1],
arrow = arrow(length = unit(0.5, "cm")), color = "forestgreen", size = 1.5)+
annotate("segment", x = mu[1], y = mu[2], xend = mu[1] + axes[1,2], yend = mu[2] + axes[2,2],
arrow = arrow(length = unit(0.5, "cm")), color = "forestgreen", size = 1.5)+
coord_fixed()
Alternative way: generate MVN data via rmvnorm
.
library(mvtnorm)
X = rmvnorm(n, mean = mu, sigma = Sigma)
colnames(X) = c("x1", "x2")
plt+
geom_point(data = data.frame(X), aes(x1, x2), color = "red", alpha = 0.5)
Plot marginal distributions.
ggplot(X, aes(x = x1))+
geom_histogram(aes(y = ..density..))+
geom_density(color = "forestgreen", size = 1.5)+
geom_vline(aes(xintercept = mu[1]), color = "forestgreen", size = 1.5)
ggplot(X, aes(x = x2))+
geom_histogram(aes(y = ..density..))+
geom_density(color = "forestgreen", size = 1.5)+
geom_vline(aes(xintercept = mu[2]), color = "forestgreen", size = 1.5)
Add marginal distributions to the scatterplot plot.
library(ggExtra)
ggMarginal(plt, type="histogram")
Add conditional distributions to the scatterplot.
library(dplyr)
x1 = X[,1]; x2 = X[,2]
x1grid = seq(quantile(x1, 0.05), quantile(x1, 0.95), length.out = 5)
x2grid = seq(min(x2), max(x2), length.out = 100)
condMVN = function(x1, x2, mu, Sigma){
muc = mu[2] + Sigma[2,1]/Sigma[1,1] * (x1 - mu[1])
Sigmac = Sigma[2,2] - Sigma[1,2] * Sigma[2,1]/Sigma[1,1]
dnorm(x2, muc, sqrt(Sigmac))
}
df = expand.grid(x1grid, x2grid) %>%
rename(x1 = 1, x2 = 2) %>%
mutate(density = condMVN(x1, x2, mu, Sigma))
plt +
geom_abline(aes(intercept = mu[2] - Sigma[2,1]/Sigma[1,1] * mu[1],
slope = Sigma[2,1]/Sigma[1,1]), color = "orange", size = 1.5) +
geom_path(df, mapping = aes(x1 + density, x2, group = x1), color = "purple", size = 1) +
geom_abline(aes(intercept = mu[2] - axes[2,1]/axes[1,1] * mu[1],
slope = axes[2,1]/axes[1,1]), color = "forestgreen", size = 1, linetype = "dashed") +
geom_abline(aes(intercept = mu[2] - axes[2,2]/axes[1,2] * mu[1],
slope = axes[2,2]/axes[1,2]), color = "forestgreen", size = 1, linetype = "dashed") +
coord_fixed()
Wishart distribution withn
= number of matrices,
df
\(= n\),
Sigma
\(=\Sigma\).
df = 5
nrep = 3
Slist = rWishart(n = nrep, df = df, Sigma = Sigma)
apply(Slist, 3, heatmap)
## [[1]]
##
## [[2]]
##
## [[3]]
Generate a sample using random covariance matrices. Compare the results.
Xlist = apply(Slist, 3, function(S) rmvnorm(500, mean = c(0,0), sigma = S/df), simplify = F)
Xs = do.call(rbind, Xlist)
colnames(Xs) = c("x1", "x2")
df = data.frame(Xs, replicate = as.factor(rep(1:nrep, rep(500, nrep))))
ggplot(data = df, aes(x1, x2, color = replicate, group = replicate)) +
geom_point(alpha = 0.3) +
stat_ellipse(level = 0.95, size = 1.5, type = "norm")
What if we increase df
?
df = 100
Slist = rWishart(n = nrep, df = df, Sigma = Sigma)
apply(Slist, 3, heatmap)
## [[1]]
##
## [[2]]
##
## [[3]]
When increasing degrees of freedom, random Wishart matrices become more similar. As a result, the generated data look more similar as well.
Xlist = apply(Slist, 3, function(S) rmvnorm(500, mean = c(0,0), sigma = S/df), simplify = F)
Xs = do.call(rbind, Xlist)
colnames(Xs) = c("x1", "x2")
df = data.frame(Xs, replicate = as.factor(rep(1:nrep, rep(500, nrep))))
ggplot(data = df, aes(x1, x2, color = replicate, group = replicate)) +
geom_point(alpha = 0.3) +
stat_ellipse(level = 0.95, size = 1.5, type = "norm")