smacarthur · December 10, 2010 12:33
diff --git a/splitAndPlot.R b/splitAndPlot.R
 if (!require(RColorBrewer)){
  install.packages("RColorBrewer")
  library(RColorBrewer)
 }

 ### Generate a random data set                                                                                                                                                  
 data <- data.frame(names=c("Type1","Type2")[as.numeric((runif(n=100)>=0.5))+1],data=rnorm(100,100,sd=25))

 ### Use the aggregate function to split and get the mean of the data                                                                                                            
 aggregate(data$data,list(data$names),mean)

 ### Use the sapply and split functions to do the same thing                                                                                                                     
 s <- split(data$data,list(data$names))
 sapply(s,mean)
 # Or the same thing in one line                                                                                                                                                 
 sapply(split(data$data,list(data$names)),mean)

 ### Below is a function which is just a group of commands used to stop                                                                                                          
 ### you having to type the same code in again for another dataset                                                                                                               

 plotData <- function(data,cols){
 ### Draw a box plot of the data                                                                                                                                                 
 plot(data$data ~ data$names,col=cols,pch=20)
 ### Run a t-test on the split data                                                                                                                                              
 pval <- t.test(data$data ~ data$names)$p.value
 ### Are they significantly different ?                                                                                                                                          
 areSig <- c("Not Significant","Significant")[as.numeric(pval<=0.05)+1]
 ### Calculate the density of the data, after splitting                                                                                                                          
 dens <- lapply(split(data$data,data$names),density)
 ### Draw an empty figure with the correct x and y limits of the data                                                                                                            
 plot(1,xlim=c(0,max(sapply(dens,function(x) max(x$x)))),ylim=c(0,max(sapply(dens,function(x) max(x$y)))))
 ### Draw the density plots for each data type                                                                                                                                   
 lapply(1:length(dens),function(x) lines(dens[[x]],col=cols[x],lwd=3))
 ### Add a legens                                                                                                                                                                
 legend("topleft",legend=names(dens),col=cols,lwd=4)
 ### Add a title with the p-value and wether it is significant or not                                                                                                            
 title(paste("P-value=",format.pval(pval),areSig))
 }

 ### Draw figures in a 2 x 2 grid                                                                                                                                                
 par(mfrow=c(2,2))
 ### Run the plotData function on the data object                                                                                                                                
 plotData(data,cols=brewer.pal(8,"Dark2"))

 ### Make a new version of the data object, which should be significantly different, as they have different means                                                                
 data <- data.frame(names=rep(c("Type3","Type4"),each=50),data=c(rnorm(50,100,sd=20),rnorm(50,50,sd=10)))
 ### Plot the new version of the data                                                                                                                                            
 plotData(data,cols=brewer.pal(8,"Set1"))
	if (!require(RColorBrewer)){
	install.packages("RColorBrewer")
	library(RColorBrewer)
	}

	### Generate a random data set
	data <- data.frame(names=c("Type1","Type2")[as.numeric((runif(n=100)>=0.5))+1],data=rnorm(100,100,sd=25))

	### Use the aggregate function to split and get the mean of the data
	aggregate(data$data,list(data$names),mean)

	### Use the sapply and split functions to do the same thing
	s <- split(data$data,list(data$names))
	sapply(s,mean)
	# Or the same thing in one line
	sapply(split(data$data,list(data$names)),mean)

	### Below is a function which is just a group of commands used to stop
	### you having to type the same code in again for another dataset

	plotData <- function(data,cols){
	### Draw a box plot of the data
	plot(data$data ~ data$names,col=cols,pch=20)
	### Run a t-test on the split data
	pval <- t.test(data$data ~ data$names)$p.value
	### Are they significantly different ?
	areSig <- c("Not Significant","Significant")[as.numeric(pval<=0.05)+1]
	### Calculate the density of the data, after splitting
	dens <- lapply(split(data$data,data$names),density)
	### Draw an empty figure with the correct x and y limits of the data
	plot(1,xlim=c(0,max(sapply(dens,function(x) max(x$x)))),ylim=c(0,max(sapply(dens,function(x) max(x$y)))))
	### Draw the density plots for each data type
	lapply(1:length(dens),function(x) lines(dens[[x]],col=cols[x],lwd=3))
	### Add a legens
	legend("topleft",legend=names(dens),col=cols,lwd=4)
	### Add a title with the p-value and wether it is significant or not
	title(paste("P-value=",format.pval(pval),areSig))
	}

	### Draw figures in a 2 x 2 grid
	par(mfrow=c(2,2))
	### Run the plotData function on the data object
	plotData(data,cols=brewer.pal(8,"Dark2"))

	### Make a new version of the data object, which should be significantly different, as they have different means
	data <- data.frame(names=rep(c("Type3","Type4"),each=50),data=c(rnorm(50,100,sd=20),rnorm(50,50,sd=10)))
	### Plot the new version of the data
	plotData(data,cols=brewer.pal(8,"Set1"))