Skip to content

Instantly share code, notes, and snippets.

@DannyArends
Created July 10, 2024 19:01
Show Gist options
  • Save DannyArends/5fb5258d3d524c3f0bb271c6603197c2 to your computer and use it in GitHub Desktop.
Save DannyArends/5fb5258d3d524c3f0bb271c6603197c2 to your computer and use it in GitHub Desktop.
Code for the YouTube lecture "Principal Component Analysis from Scratch" (https://www.youtube.com/live/y9zS7Xm0iEU)
# Unit-Variance Scaling or Autoscaling
autoscale <- function(X) {
return(apply(X,2, function(x){ (x- mean(x)) / sd(x) }))
}
# Compute the covariance matrix
covariance <- function(X) {
cM <- matrix(NA, ncol(X), ncol(X), dimnames = list(colnames(X), colnames(X)))
for(x in colnames(X)) {
for(y in colnames(X)) {
cM[x,y] <- (sum((X[,x] - mean(X[,x]))*(X[,y] - mean(X[,y])))) / (nrow(X)-1)
}
}
return(cM)
}
# Compute eigen vectors through QR decomposition
eigenvectors <- function(X, n.iter = 100) {
pQ <- diag(1, ncol(X)); # Set pQ as the Identity matrix
for (i in 1:n.iter) {
d <- qr(X); # QR decomposition
Q <- qr.Q(d); # Reconstruct the Q matrix
pQ <- pQ %*% Q; # Update the product of Q
X <- qr.R(d) %*% Q; # Reconstruct X from the R matrix multiplied by Q
}
return(list("values" = diag(X), "vectors" = pQ))
}
data(iris)
values <- as.matrix(iris[,1:4])
labels <- as.factor(iris[,5])
# Compute Unit-Variance Scaling
std <- autoscale(values)
# Compute covariance matrix (from scratch)
cov <- covariance(std)
# Compute the eigen values and vectors
evs <- eigenvectors(cov)
# Compute (cumulative) variance explained
ve <- round((evs$values / sum(evs$values)) * 100, 1)
cve <- cumsum(ve)
plot(cve, main = "Cumulative variance explained", xlab = "Component", ylab = "%", t = "b")
# Compute the projection (P)
W <- evs$vectors
P <- std %*% W
colnames(P) <- paste0("PC", 1:ncol(P))
# Plot the computed principal components compared to the R default function
op <- par(mfrow = c(1,2))
plot(P[,c("PC1","PC2")], col = labels, main = "From Scratch", pch=19)
legend("bottomright", levels(labels), col = unique(labels), pch=19)
plot(prcomp(values, scale = TRUE)$x[,c("PC1","PC2")], col = labels, main ="Function prcomp()", pch=19)
legend("topright", levels(labels), col = unique(labels), pch=19)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment