Created
October 26, 2011 01:45
Revisions
-
smc77 revised this gist
Nov 1, 2011 . 1 changed file with 5 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -8,11 +8,11 @@ sa.heart <- read.table("http://www-stat.stanford.edu/~tibs/ElemStatLearn/dataset # Pretty plot pairs(sa.heart[1:9],pch=21,bg=c("red","green")[factor(sa.heart$chd)]) # Apply logistic regression to South African heart data from ESL sa.heart.fit <- glm(chd ~ ., family = binomial, data=sa.heart) summary(sa.heart.fit) # Use stepwise logistic regression to reduce the dimensions library(MASS) sa.heart.step <- stepAIC(glm(chd ~ ., data=sa.heart)) summary(sa.heart.step) -
smc77 revised this gist
Oct 28, 2011 . 1 changed file with 0 additions and 70 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,4 +1,3 @@ # Plot the sigmoid function library(ggplot2) qplot(-10:10, 1/(1 + exp(-(-10:10))), geom="line", xlab="z", ylab="sigmoid function") @@ -9,75 +8,6 @@ sa.heart <- read.table("http://www-stat.stanford.edu/~tibs/ElemStatLearn/dataset # Pretty plot pairs(sa.heart[1:9],pch=21,bg=c("red","green")[factor(sa.heart$chd)]) # Use stepwise logistic regression to reduce the dimensions library(MASS) sa.heart.step <- stepAIC(sa.heart) -
smc77 revised this gist
Oct 28, 2011 . 1 changed file with 75 additions and 15 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,28 +1,88 @@ # Plot the sigmoid function library(ggplot2) qplot(-10:10, 1/(1 + exp(-(-10:10))), geom="line", xlab="z", ylab="sigmoid function") # Download South African heart disease data sa.heart <- read.table("http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/SAheart.data", sep=",",head=T,row.names=1) # Pretty plot pairs(sa.heart[1:9],pch=21,bg=c("red","green")[factor(sa.heart$chd)]) num.iterations <- 1000 # Download South African heart disease data sa.heart <- read.table("http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/SAheart.data", sep=",",head=T,row.names=1) x <- sa.heart[,c("age", "ldl")] y <- sa.heart$chd plot(x, pch=21, bg=c("red","green")[factor(y)]) # Function to standardize input values zscore <- function(x, mean.val=NA) { if(is.matrix(x)) return(apply(x, 2, zscore, mean.val=mean.val)) if(is.data.frame(x)) return(data.frame(apply(x, 2, zscore, mean.val=mean.val))) if(is.na(mean.val)) mean.val <- mean(x) sd.val <- sd(x) if(all(sd.val == 0)) return(x) # if all the values are the same (x - mean.val) / sd.val } # Standardize the features x.scaled <- zscore(x) # Gradient descent function grad <- function(x, y, theta) { gradient <- (1 / nrow(y)) * (t(x) %*% (1/(1 + exp(-x %*% t(theta))) - y)) return(t(gradient)) } gradient.descent <- function(x, y, alpha=0.1, num.iterations=500, threshold=1e-5, output.path=FALSE) { # Add x_0 = 1 as the first column m <- if(is.vector(x)) length(x) else nrow(x) if(is.vector(x) || (!all(x[,1] == 1))) x <- cbind(rep(1, m), x) if(is.vector(y)) y <- matrix(y) x <- apply(x, 2, as.numeric) num.features <- ncol(x) # Initialize the parameters theta <- matrix(rep(0, num.features), nrow=1) # Look at the values over each iteration theta.path <- theta for (i in 1:num.iterations) { theta <- theta - alpha * grad(x, y, theta) if(all(is.na(theta))) break theta.path <- rbind(theta.path, theta) if(i > 2) if(all(abs(theta - theta.path[i-1,]) < threshold)) break } if(output.path) return(theta.path) else return(theta.path[nrow(theta.path),]) } unscaled.theta <- gradient.descent(x=x, y=y, num.iterations=num.iterations, output.path=TRUE) scaled.theta <- gradient.descent(x=x.scaled, y=y, num.iterations=num.iterations, output.path=TRUE) summary(glm(chd ~ age + ldl, family = binomial, data=sa.heart)) qplot(1:(nrow(scaled.theta)), scaled.theta[,1], geom=c("line"), xlab="iteration", ylab="theta_1") qplot(1:(nrow(scaled.theta)), scaled.theta[,2], geom=c("line"), xlab="iteration", ylab="theta_2") # Look at output for various different alpha values vary.alpha <- lapply(c(1e-12, 1e-9, 1e-7, 1e-3, 0.1, 0.9), function(alpha) gradient.descent(x=x.scaled, y=y, alpha=alpha, num.iterations=num.iterations, output.path=TRUE)) par(mfrow = c(2, 3)) for (j in 1:6) { plot(vary.alpha[[j]][,2], ylab="area (alpha=1e-9)", xlab="iteration", type="l") } # Use stepwise logistic regression to reduce the dimensions library(MASS) sa.heart.step <- stepAIC(sa.heart) summary(sa.heart.step) # Apply logistic regression to South African heart data from ESL sa.heart.fit <- glm(chd ~ sbp + tobacco + ldl + famhist + obesity + alcohol + age , family = binomial, data=sa.heart) summary(sa.heart.fit) -
smc77 revised this gist
Oct 27, 2011 . 1 changed file with 18 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -5,7 +5,24 @@ qplot(-10:10, 1/(1 + exp(-(-10:10))), geom="line", xlab="z", ylab="sigmoid funct sa.heart <- glm(chd ~ sbp + tobacco + ldl + famhist + obesity + alcohol + age , family = binomial , data=hr) summary(sa.heart) # Download South African heart disease data hr <- read.table("http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/SAheart.data", sep=",",head=T,row.names=1) # Pretty plot pairs(hr[1:9],pch=21,bg=c("red","green")[factor(hr$chd)]) # Scale data and create a random train / test split n <- nrow(hr) p <- ncol(hr)-1 test.ratio <- .2 # ratio of test/train samples n.test <- ceiling(n*test.ratio) testi <- sample(1:n,n.test) traini <- setdiff(1:n,testi) data.train <- hr[traini,] data.test <- hr[testi,] # Use stepwise logistic regression to reduce the dimensions library(MASS) sa.heart.step <- stepAIC(sa.heart) summary(sa.heart.step) -
smc77 revised this gist
Oct 26, 2011 . 1 changed file with 4 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,8 +1,11 @@ # Plot the sigmoid function qplot(-10:10, 1/(1 + exp(-(-10:10))), geom="line", xlab="z", ylab="sigmoid function") # Apply logistic regression to South African heart data from ESL sa.heart <- glm(chd ~ sbp + tobacco + ldl + famhist + obesity + alcohol + age , family = binomial , data=hr) summary(sa.heart) # Use stepwise logistic regression to reduce the dimensions library(MASS) sa.heart.step <- stepAIC(sa.heart) summary(sa.heart.step) -
smc77 revised this gist
Oct 26, 2011 . 1 changed file with 4 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -2,3 +2,7 @@ sa.heart <- glm(chd ~ sbp + tobacco + ldl + famhist + obesity + alcohol + age , family = binomial , data=hr) summary(sa.heart) library(MASS) sa.heart.step <- stepAIC(sa.heart) summary(sa.heart.step) -
smc77 created this gist
Oct 26, 2011 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,4 @@ # Apply logistic regression to South African heart data from ESL sa.heart <- glm(chd ~ sbp + tobacco + ldl + famhist + obesity + alcohol + age , family = binomial , data=hr) summary(sa.heart)