jeffreyhanson · August 27, 2015 17:34
diff --git a/splines_stan.R b/splines_stan.R
 #### Initialization
 # set global pars
 rm(list=ls())
 set.seed(500)

 # set user params
 n=500
 spline_df=5

 # load deps
 library(rstan)
 library(magrittr)
 library(plyr)
 library(dplyr)
 library(ggplot2)
 library(mgcv)
 library(splines)
 options(mc.cores=4)
 rstan_options(auto_write=TRUE)

 # define functions
 test=function(){source('F:/mainFS/PHD/PAPERS/2/scripts/models/stan_spline.R')}

 # define model
 model_code =  "
 	data {
 		int<lower=1> n_train; // number of training observations
 		int<lower=1> n_predict; // number of observations to predict
 		int<lower=1> d; // number of parameters
 		
 		real y_train[n_train]; // response variable training data
 		
 		matrix[n_train,d] X_train; // model matrix for training observations 
 		matrix[n_predict,d] X_predict; // model matrix for test observations 
 	}
 	
 	parameters {
 		vector[d] beta;
 		real<lower=0> sigma;
 	}
 	
 	model {
 		vector[d] mu;
 		
 		// priors
 		beta ~ normal(0,10);
 		sigma ~ cauchy(0, 2.5);
 		
 		// likelihood
 		y_train ~ normal(X_train * beta, sigma);
 	}
 	
 	generated quantities {
 		vector[n_predict] y_predict;
 		y_predict <- X_predict * beta;
 		for (i in 1:n_predict)
 			y_predict[i] <- normal_rng(y_predict[i], sigma);
 	}

 "


 #### Preliminary processing
 # load data
 raw_data = gamSim(1,n=500,dist="normal",scale=1)

 # make data.frame with predicted values
 predict_data=data.frame(
 	y=0,
 	x2=seq(
 		min(raw_data$x2) - (0.25*abs(diff(range(raw_data$x2)))),
 		max(raw_data$x2) + (0.25*abs(diff(range(raw_data$x2)))),
 		length.out=100
 	)
 )

 # calculate knot positions
 knot_positions = stats::quantile(
 	raw_data$x2,
 	seq.int(0, 1, length.out = (spline_df - 1L) + 2L)[-c(1L, (spline_df - 1L) + 2L)]
 )

 # model formula
 model_formula=y ~ ns(x2, knots=knot_positions, Boundary.knots=range(predict_data$x2))


 # make model matrices
 matrix_train=model.matrix(
 	model_formula,
 	raw_data
 )

 matrix_predict=model.matrix(
 	model_formula,
 	predict_data
 )

 # prepare data for stan
 model_data=list(
 	d=ncol(matrix_train),
 	y_train=raw_data$y,
 	n_train=nrow(matrix_train),
 	X_train=matrix_train,
 	n_predict=nrow(matrix_predict),
 	X_predict=matrix_predict
 )

 #### Main processing
 # run model
 model_results=stan(
 	model_code=model_code,
 	data=model_data,
 	chains=4,
 	iter=2000,
 	thin=1
 )

 print(model_results, pars=c('beta','sigma'))

 # calculate predictions
 predict_data$y=colMeans(rstan::extract(model_results, pars='y_predict')[[1]])
 predict_data$upper=apply(rstan::extract(model_results, pars='y_predict')[[1]],2,quantile,c(0.025))
 predict_data$lower=apply(rstan::extract(model_results, pars='y_predict')[[1]],2,quantile,c(0.975))

 #### Exports
 # plot results
 p1=ggplot() +
 	geom_point(aes(x=x2, y=y), data=raw_data) +
 	geom_ribbon(
 		aes(ymax=upper, ymin=lower, x=x2),
 		alpha=0.2,
 		data=predict_data
 	) +
 	geom_line(
 		aes(x=x2, y=y),
 		colour='blue',
 		data=predict_data
 	) + 
 	xlab('Predictor variable') +
 	ylab('Reponse variable') +
 	ggtitle('Fixed knot natural spline fit using Stan')
 print(p1)
	#### Initialization
	# set global pars
	rm(list=ls())
	set.seed(500)

	# set user params
	n=500
	spline_df=5

	# load deps
	library(rstan)
	library(magrittr)
	library(plyr)
	library(dplyr)
	library(ggplot2)
	library(mgcv)
	library(splines)
	options(mc.cores=4)
	rstan_options(auto_write=TRUE)

	# define functions
	test=function(){source('F:/mainFS/PHD/PAPERS/2/scripts/models/stan_spline.R')}

	# define model
	model_code = "
	data {
	int<lower=1> n_train; // number of training observations
	int<lower=1> n_predict; // number of observations to predict
	int<lower=1> d; // number of parameters

	real y_train[n_train]; // response variable training data

	matrix[n_train,d] X_train; // model matrix for training observations
	matrix[n_predict,d] X_predict; // model matrix for test observations
	}

	parameters {
	vector[d] beta;
	real<lower=0> sigma;
	}

	model {
	vector[d] mu;

	// priors
	beta ~ normal(0,10);
	sigma ~ cauchy(0, 2.5);

	// likelihood
	y_train ~ normal(X_train * beta, sigma);
	}

	generated quantities {
	vector[n_predict] y_predict;
	y_predict <- X_predict * beta;
	for (i in 1:n_predict)
	y_predict[i] <- normal_rng(y_predict[i], sigma);
	}

	"


	#### Preliminary processing
	# load data
	raw_data = gamSim(1,n=500,dist="normal",scale=1)

	# make data.frame with predicted values
	predict_data=data.frame(
	y=0,
	x2=seq(
	min(raw_data$x2) - (0.25*abs(diff(range(raw_data$x2)))),
	max(raw_data$x2) + (0.25*abs(diff(range(raw_data$x2)))),
	length.out=100
	)
	)

	# calculate knot positions
	knot_positions = stats::quantile(
	raw_data$x2,
	seq.int(0, 1, length.out = (spline_df - 1L) + 2L)[-c(1L, (spline_df - 1L) + 2L)]
	)

	# model formula
	model_formula=y ~ ns(x2, knots=knot_positions, Boundary.knots=range(predict_data$x2))


	# make model matrices
	matrix_train=model.matrix(
	model_formula,
	raw_data
	)

	matrix_predict=model.matrix(
	model_formula,
	predict_data
	)

	# prepare data for stan
	model_data=list(
	d=ncol(matrix_train),
	y_train=raw_data$y,
	n_train=nrow(matrix_train),
	X_train=matrix_train,
	n_predict=nrow(matrix_predict),
	X_predict=matrix_predict
	)

	#### Main processing
	# run model
	model_results=stan(
	model_code=model_code,
	data=model_data,
	chains=4,
	iter=2000,
	thin=1
	)

	print(model_results, pars=c('beta','sigma'))

	# calculate predictions
	predict_data$y=colMeans(rstan::extract(model_results, pars='y_predict')[[1]])
	predict_data$upper=apply(rstan::extract(model_results, pars='y_predict')[[1]],2,quantile,c(0.025))
	predict_data$lower=apply(rstan::extract(model_results, pars='y_predict')[[1]],2,quantile,c(0.975))

	#### Exports
	# plot results
	p1=ggplot() +
	geom_point(aes(x=x2, y=y), data=raw_data) +
	geom_ribbon(
	aes(ymax=upper, ymin=lower, x=x2),
	alpha=0.2,
	data=predict_data
	) +
	geom_line(
	aes(x=x2, y=y),
	colour='blue',
	data=predict_data
	) +
	xlab('Predictor variable') +
	ylab('Reponse variable') +
	ggtitle('Fixed knot natural spline fit using Stan')
	print(p1)