Helper functions

library(ggplot2)
library(dplyr)
library(viridis)
library(ggfortify)

barplot = function(values){
  n = length(values)
  df = data.frame(value = values,  index = 1:n)
  ggplot(df, aes(index, value, fill = value)) + 
    geom_bar(color = "black", stat = "identity")+
    scale_fill_gradient2(low="#619CFF", mid="white", high="#F8766D")+
    theme_bw()+
    theme(legend.position = "none")
}

heatmap = function(A){
  n = nrow(A)
  p = ncol(A)  
  df = data.frame(value = c(A),  i = 1:n, j = rep(1:p, rep(n, p)))
  ggplot(df, aes(j, i, fill = value)) + 
    geom_tile()+
    scale_fill_viridis()+
    #scale_fill_gradient(low="grey", high="black")+
    scale_y_reverse()+
    theme_void()
}

Fnorm = function(A) return(sqrt(sum(A^2)))

Visualizing low-rank matrices

n = 50
p = 30
r = 1
A = matrix(runif(n*r), n, r)
B = matrix(runif(p*r), p, r)
X = A %*% t(B)
heatmap(X)

r = 2
A = matrix(runif(n*r), n, r)
B = matrix(runif(p*r), p, r)
X = A %*% t(B)
heatmap(X)

r = 10
A = matrix(runif(n*r), n, r)
B = matrix(runif(p*r), p, r)
X = A %*% t(B)
heatmap(X)

r = 30
A = matrix(runif(n*r), n, r)
B = matrix(runif(p*r), p, r)
X = A %*% t(B)
heatmap(X)

Low-rank approximation

library(tiff)
library(rasterImage)
LRA = function(X, r){
  SVD = svd(X)
  Xhat = SVD$u[,1:r] %*% diag(SVD$d[1:r],r) %*% t(SVD$v[,1:r])
  return(Xhat)
}

Image compression

Load and image and check how rank impacts the resulting image.

X = readTIFF("img/baboon.tiff")[,,1]
X = scale(X, center = T, scale = F)
dim(X)
## [1] 512 512
ve = (svd(X)$d)^2/sum((svd(X)$d)^2)
barplot(cumsum(ve)[1:50])+
  xlab("principal component")+
  ylab("cumulative variance explained")

heatmap(X)

heatmap(LRA(X, 1))

heatmap(LRA(X, 5))

heatmap(LRA(X, 10))

heatmap(LRA(X, 50))

De-noising of an image

Create a low-rank pattern.

set.seed(3)
library(splines)
n = 100
df = 15
H = ns(1:n, df = df)
Theta = matrix(rnorm(df * 3), df, 3)
A = H %*% Theta
X = A %*% t(A)
heatmap(X)

ve = (svd(X)$d)^2/sum((svd(X)$d)^2)
barplot(ve[1:10])+
  xlab("principal component")+
  ylab("variance explained")

Add noise and try de-noising varying the rank of approximation.

E = matrix(rnorm(n*n, 0, 0.2), n, n)
Xnoise = X+E
heatmap(Xnoise)

ve = (svd(Xnoise)$d)^2/sum((svd(Xnoise)$d)^2)
barplot(ve[1:10])+
  xlab("principal component")+
  ylab("variance explained")

heatmap(LRA(Xnoise, 1))

heatmap(LRA(Xnoise, 3))

heatmap(LRA(Xnoise, 20))

Increase the noise and check the performance.

E = matrix(rnorm(n*n, 0, 0.5), n, n)
Xnoise = X+E
heatmap(Xnoise)

ve = (svd(Xnoise)$d)^2/sum((svd(Xnoise)$d)^2)
barplot(ve[1:10])+
  xlab("principal component")+
  ylab("variance explained")

heatmap(LRA(Xnoise, 1))

heatmap(LRA(Xnoise, 3))

heatmap(LRA(Xnoise, 20))

Soft rank approximation

nucnorm = function(X){
  d = svd(X)$d
  return(sum(d))
}
SRA = function(X, lambda){
  SVD = svd(X)
  d = SVD$d
  dstar = pmax(d - lambda, 0)
  return(SVD$u %*% diag(dstar) %*% t(SVD$v))
}

Let’s return to baboons.

X = readTIFF("img/baboon.tiff")[,,1]
X = scale(X, center = T, scale = F)
heatmap(X)

n = nrow(X)
p = ncol(X)

Check the distribution of singular values after applying the soft-threshold.

singvals = c()
for(lambda in c(0.01, 0.1, 1, 2, 5, 10)){
  Xhat = SRA(X, lambda)
  singvals = rbind(singvals, data.frame(lambda, dstar = svd(Xhat)$d, index = 1:p))
}
ggplot(singvals %>% mutate(lambda = as.factor(lambda)))+
  geom_bar(aes(x = index, y = dstar, fill = lambda), stat = "identity",  alpha = 0.5)+
  facet_grid(lambda~., labeller = label_both)+
  theme(legend.position = "none")+
  ylab("signular values of the approximation")

Check that the nuclear norm decreases.

singvals %>% 
  group_by(lambda) %>%
  summarise(norm = nucnorm(dstar)) %>%
ggplot(aes(lambda, norm))+
  geom_point()+
  geom_line()+
  ylab("nuclear norm")+
  xlab(expression(lambda))

The results resemble the ones obtained by low-rank matrix approximation.

heatmap(SRA(X, 1))

heatmap(SRA(X, 5))

heatmap(SRA(X, 10))