Created
December 10, 2010 12:33
Example code to split and plot a data.frame
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if (!require(RColorBrewer)){ | |
install.packages("RColorBrewer") | |
library(RColorBrewer) | |
} | |
### Generate a random data set | |
data <- data.frame(names=c("Type1","Type2")[as.numeric((runif(n=100)>=0.5))+1],data=rnorm(100,100,sd=25)) | |
### Use the aggregate function to split and get the mean of the data | |
aggregate(data$data,list(data$names),mean) | |
### Use the sapply and split functions to do the same thing | |
s <- split(data$data,list(data$names)) | |
sapply(s,mean) | |
# Or the same thing in one line | |
sapply(split(data$data,list(data$names)),mean) | |
### Below is a function which is just a group of commands used to stop | |
### you having to type the same code in again for another dataset | |
plotData <- function(data,cols){ | |
### Draw a box plot of the data | |
plot(data$data ~ data$names,col=cols,pch=20) | |
### Run a t-test on the split data | |
pval <- t.test(data$data ~ data$names)$p.value | |
### Are they significantly different ? | |
areSig <- c("Not Significant","Significant")[as.numeric(pval<=0.05)+1] | |
### Calculate the density of the data, after splitting | |
dens <- lapply(split(data$data,data$names),density) | |
### Draw an empty figure with the correct x and y limits of the data | |
plot(1,xlim=c(0,max(sapply(dens,function(x) max(x$x)))),ylim=c(0,max(sapply(dens,function(x) max(x$y))))) | |
### Draw the density plots for each data type | |
lapply(1:length(dens),function(x) lines(dens[[x]],col=cols[x],lwd=3)) | |
### Add a legens | |
legend("topleft",legend=names(dens),col=cols,lwd=4) | |
### Add a title with the p-value and wether it is significant or not | |
title(paste("P-value=",format.pval(pval),areSig)) | |
} | |
### Draw figures in a 2 x 2 grid | |
par(mfrow=c(2,2)) | |
### Run the plotData function on the data object | |
plotData(data,cols=brewer.pal(8,"Dark2")) | |
### Make a new version of the data object, which should be significantly different, as they have different means | |
data <- data.frame(names=rep(c("Type3","Type4"),each=50),data=c(rnorm(50,100,sd=20),rnorm(50,50,sd=10))) | |
### Plot the new version of the data | |
plotData(data,cols=brewer.pal(8,"Set1")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment