jaymon0703 · October 12, 2017 21:59
diff --git a/random_strategy_builder.R b/random_strategy_builder.R
 require(blotter)

 # Remove portfolio and account data if run previously
 try(rm("portfolio.txnsim_rnorm_port","account.txnsim_rnorm_acct",pos=.blotter), silent = TRUE)

 # load the example data
 currency("USD")
 stock("GSPC",currency="USD",multiplier=1)
 getSymbols('^GSPC', src='yahoo', index.class=c("POSIXt","POSIXct"),from='1998-01-01')

 # Initialize the Portfolio
 initPortf("txnsim_rnorm_port",symbols="GSPC",initDate="1998-01-02")
 initAcct("txnsim_rnorm_acct",portfolios="txnsim_rnorm_port",initDate="1998-01-02", initEq=10000)

 # Store strategy calendar duration
 calendardur <- nrow(GSPC)
 calendardur
 targetdur <- calendardur # for now...TODO - add levels which will possibly take targetdur over calendardur
 # Randomise time in market
 # First flat periods (qty = 0, so no need to randomise these)
 flatdur <- round(runif(1, 0, calendardur*0.3),0)
 flatdur
 flatdur_mean <- 90 # choose a flat duration mean that makes sense to your random use case
 flatdur_stddev <- 20 # choose a flat duration std dev that makes sense to your random use case
 flatrows <- round(flatdur/rnorm(n = 1, mean = flatdur_mean, sd = flatdur_stddev))
 flatdur_vec <- round(rnorm(n = flatrows, mean = flatdur_mean, sd = flatdur_stddev),0) # compute vector of elements for flatdur, using fudgefactor to ensure extends at least beyond flatdur
 flatdur_vec
 sum(flatdur_vec)
 avgdur <- sum(flatdur_vec)/flatrows

 flatqty <- replicate(n = flatrows, 0) # no need to randomise qty

 # Now long periods (adding randomisation for position sizes)
 lsratio <- 0.5
 longdur <- round((targetdur - flatdur) * lsratio,0)
 longdur
 longdur_mean <- 45 # choose a flat duration mean that makes sense to your random use case
 longdur_stddev <- 15 # choose a flat duration std dev that makes sense to your random use case
 longrows <- round(longdur/rnorm(n = 1, mean = longdur_mean, sd = longdur_stddev))
 longdur_vec <- round(rnorm(n = longrows, mean = longdur_mean, sd = longdur_stddev),0) # compute vector of elements for flatdur, using fudgefactor to ensure extends at least beyond flatdur
 longdur_vec
 sum(longdur_vec)
 avgdur <- sum(longdur_vec)/longrows

 longqty <- round(rnorm(n = longrows, mean = 100, sd = 10), 0)
 longqty

 # Now short periods (adding randomisation for position sizes)
 lsratio <- lsratio
 shortdur <- targetdur - flatdur - longdur
 shortdur
 shortdur_mean <- 45 # choose a flat duration mean that makes sense to your random use case
 shortdur_stddev <- 15 # choose a flat duration std dev that makes sense to your random use case
 shortrows <- round(shortdur/rnorm(n = 1, mean = shortdur_mean, sd = shortdur_stddev))
 shortdur_vec <- round(rnorm(n = shortrows, mean = shortdur_mean, sd = shortdur_stddev),0) # compute vector of elements for flatdur, using fudgefactor to ensure extends at least beyond flatdur
 shortdur_vec
 sum(shortdur_vec)
 avgdur <- sum(shortdur_vec)/shortrows

 shortqty <- round(rnorm(n = shortrows, mean = -100, sd = 10), 0)
 shortqty

 # TODO: randomise levels

 subsample <- function(svector, targetdur, replacement=TRUE, ...,duration, qty) {
  #`trades` already exists in function scope
  
  dur <- 0 # initialize duration counter
  tdf <- data.frame() #initialize output data.frame
  nsamples <- round(length(svector) * fudgefactor, 0)
  while (dur < targetdur) {
    s <- sample(svector, nsamples, replace = replacement)
    # sdf <- data.frame(duration = trades[s,'duration'],
    #                   quantity = trades[s,'quantity'])
    sdf <- data.frame(duration = duration[s],
                      quantity = qty[s])
    if (is.null(tdf$duration)) {
      tdf <- sdf
    } else {
      tdf <- rbind(tdf, sdf)
    }
    dur <- sum(tdf$duration)
    nsamples <- round(((targetdur - dur) / avgdur) * fudgefactor, 0)
    nsamples <- ifelse(nsamples == 0, 1, nsamples)
    # print(nsamples) # for debugging
    dur
  }
  # could truncate data frame here to correct total duration
  # the row which takes our duration over the target
  xsrow <- last(which(cumsum(as.numeric(tdf$duration)) < (targetdur))) + 1
  if (xsrow == nrow(tdf)) {
    # the last row sampled takes us over targetdur
    adjxsrow <- sum(tdf$duration) - targetdur
    tdf$duration[xsrow] <- tdf$duration[xsrow] - adjxsrow
  } else if (xsrow < nrow(tdf)) {
    # the last iteration of the while loop added more than one row
    # which took our duration over the target
    tdf <- tdf[-seq.int(xsrow + 1, nrow(tdf), 1), ]
    adjxsrow <- sum(tdf$duration) - targetdur
    tdf$duration[xsrow] <- tdf$duration[xsrow] - adjxsrow
  }
  
  tdf  # return target data frame
 } # end subsample

 #sample long, short, flat periods
 if(flatdur > 0){
  a <- flatdur/sum(flatdur_vec)
  fudgefactor <- ceiling(a * 100) / 100
  flatdf  <- subsample(svector = flatrows, targetdur = flatdur, duration = flatdur_vec, qty = flatqty)
 } else {
  flatdf <- NULL
 }
 if(longdur > 0){ # ie. there are long round turn trades in the strategy
  a <- longdur/sum(longdur_vec)
  fudgefactor <- ceiling(a * 100) / 100
  longdf  <- subsample(svector = longrows, targetdur = longdur, duration = longdur_vec, qty = longqty)
 } else {
  longdf <- NULL
 }
 if(shortdur > 0){ # ie. there are short round turn trades in the strategy
  a <- shortdur/sum(shortdur_vec)
  fudgefactor <- ceiling(a * 100) / 100
  shortdf <- subsample(svector = shortrows, targetdur = shortdur, duration = shortdur_vec, qty = shortqty)
 } else {
  shortdf <- NULL
 }
 # Lines 129-162 are directly from txnsim...as we are not levelling for now, there is no need to concern ourselves with layers
 # #browser()
 # # make the first layer
 # # 1. start with flat periods
 # firstlayer <- flatdf
 # # 2. segment trades for first layer
 # targetlongdur <- structure(round((calendardur-flatdur)*lsratio),units='secs',class='difftime')
 # if(!is.null(longdf)){ # ie. there are long round turn trades in the strategy
 #   targetlongrow <- last(which(cumsum(as.numeric(longdf$duration))<targetlongdur))
 #   firstlayer    <- rbind(firstlayer,longdf[1:targetlongrow,])
 # } else {
 #   targetlongrow <- 0
 # }
 # # firstlayer    <- rbind(firstlayer,longdf[1:targetlongrow,])
 # if(!is.null(shortdf)){ # ie. there are short round turn trades in the strategy
 #   targetshortrow <- last( which( cumsum(as.numeric(shortdf$duration))<(calendardur-sum(firstlayer$duration)) ) )
 #   firstlayer     <- rbind(firstlayer,shortdf[1:targetshortrow,])
 # } else {
 #   targetshortrow <- 0
 # }
 # firstlayer    <- firstlayer[sample(nrow(firstlayer),replace=FALSE),]
 # # firstlayer should be just slightly longer than calendardur, we'll truncate later
 # 
 # tdf <- firstlayer # establish target data.frame
 # 
 # # build a vector of start times
 # start <- first(trades$start) + cumsum(as.numeric(tdf$duration))
 # # add the first start time back in
 # start <- c(first(trades$start), start)
 # # take off the last end time, since we won't put in a closing trade
 # start <- start[-length(start)]
 # # add start column to tdf
 # tdf$start <- start
 # # rearrange columns for consistency
 # tdf <- tdf[, c("start", "duration", "quantity")]

 # Since we now have flatdf, longdf and shortdf we are in a position to combine the dataframes
 # to make a single dataframe of durations and quantities for our random strategy which we will
 # add transaction based on. To start, we will randomly select a flat pertiod and then sample
 # the remaining rows. This is to mirror production backtests which generally require lead time
 # before rules are activated and positions opened.
 startrow <- sample(flatrows, size = 1)
 # idx <- c(startrow, as.numeric(row.names((flatdf[-startrow,]))))
 startdf <- flatdf[startrow,]
 flatdf <- flatdf[-startrow,]
 mergedf <- rbind(flatdf, longdf, shortdf)
 mergedf <- mergedf[order(sample(nrow(mergedf), size = nrow(mergedf))),]
 mergedf <- rbind(startdf, mergedf)
 mergedf

 # Add transactions
 # First get prices
 dargs <- list()
 if (!is.null(dargs$env)) {
  env <- dargs$env
 } else {
  env <- .GlobalEnv
 }
 if (!is.null(dargs$prefer)) {
  prefer <- dargs$prefer
 } else {
  prefer <- NULL
 }
 # prices <- get('GSPC')
 prices <- getPrice(get("GSPC", pos = env), prefer = prefer)[, 1]
 txns <- list()

 dur_cumsum <- cumsum(mergedf$duration)
 # Now for tranactions
 for (r in 1:nrow(mergedf)) {
  # opening trade
  open  <- data.frame(
    if(r == 1) {
      start = index(prices[dur_cumsum[r] - mergedf[r,1] + 1])
      } else {
        start = index(prices[dur_cumsum[r] - mergedf[r,1]])
        },
    TxnQty = mergedf[r, "quantity"],
    TxnPrice = as.numeric(prices[start])
    )
    colnames(open) <- c("start","TxnQty","TxnPrice")
  # closing trade
  close <-
    data.frame(
      start = index(prices[dur_cumsum[r]]),
      TxnQty = -1 * mergedf[r, "quantity"],
      TxnPrice = as.numeric(prices[dur_cumsum[r]])
    )
  txns[[r]] <- rbind(open, close)
 } # end loop over rows
 txns <- do.call(rbind, txns)
 txns <- xts(txns[, c("TxnQty", "TxnPrice")], order.by = txns[, 1])
 txns <- txns[which(txns$TxnQty != 0), ]
 txns

 # portname <- "random_trader"
 symbol <- "GSPC"
 # initPortf(portname, "GSPC")
 addTxns(Portfolio = "txnsim_rnorm_port",
        Symbol = symbol,
        TxnData = txns)
 updatePortf(Portfolio = "txnsim_rnorm_port")
 chart.Posn("random_trader", "GSPC")

 ex.txnsim <- function(Portfolio
                      ,n=10
                      ,replacement=FALSE
                      , tradeDef='increased.to.reduced'
                      , chart=FALSE
 )
 {
  out <- txnsim(Portfolio,n,replacement, tradeDef = tradeDef)
  if(isTRUE(chart)) {
    portnames <- blotter:::txnsim.portnames(Portfolio, replacement, n)
    for (i in 1:n){
      p<- portnames[i]
      symbols<-names(getPortfolio(p)$symbols)
      for(symbol in symbols) {
        dev.new()
        chart.Posn(p,symbol)
      }
    }
  }
  invisible(out)
 } # end ex.txnsim

 rnorm.wr <- ex.txnsim('txnsim_rnorm_port',1000, replacement = TRUE, chart = FALSE)
 plot(rnorm.wr)
 rnorm.wr$pvalues
 hist(rnorm.wr)
	require(blotter)

	# Remove portfolio and account data if run previously
	try(rm("portfolio.txnsim_rnorm_port","account.txnsim_rnorm_acct",pos=.blotter), silent = TRUE)

	# load the example data
	currency("USD")
	stock("GSPC",currency="USD",multiplier=1)
	getSymbols('^GSPC', src='yahoo', index.class=c("POSIXt","POSIXct"),from='1998-01-01')

	# Initialize the Portfolio
	initPortf("txnsim_rnorm_port",symbols="GSPC",initDate="1998-01-02")
	initAcct("txnsim_rnorm_acct",portfolios="txnsim_rnorm_port",initDate="1998-01-02", initEq=10000)

	# Store strategy calendar duration
	calendardur <- nrow(GSPC)
	calendardur
	targetdur <- calendardur # for now...TODO - add levels which will possibly take targetdur over calendardur
	# Randomise time in market
	# First flat periods (qty = 0, so no need to randomise these)
	flatdur <- round(runif(1, 0, calendardur*0.3),0)
	flatdur
	flatdur_mean <- 90 # choose a flat duration mean that makes sense to your random use case
	flatdur_stddev <- 20 # choose a flat duration std dev that makes sense to your random use case
	flatrows <- round(flatdur/rnorm(n = 1, mean = flatdur_mean, sd = flatdur_stddev))
	flatdur_vec <- round(rnorm(n = flatrows, mean = flatdur_mean, sd = flatdur_stddev),0) # compute vector of elements for flatdur, using fudgefactor to ensure extends at least beyond flatdur
	flatdur_vec
	sum(flatdur_vec)
	avgdur <- sum(flatdur_vec)/flatrows

	flatqty <- replicate(n = flatrows, 0) # no need to randomise qty

	# Now long periods (adding randomisation for position sizes)
	lsratio <- 0.5
	longdur <- round((targetdur - flatdur) * lsratio,0)
	longdur
	longdur_mean <- 45 # choose a flat duration mean that makes sense to your random use case
	longdur_stddev <- 15 # choose a flat duration std dev that makes sense to your random use case
	longrows <- round(longdur/rnorm(n = 1, mean = longdur_mean, sd = longdur_stddev))
	longdur_vec <- round(rnorm(n = longrows, mean = longdur_mean, sd = longdur_stddev),0) # compute vector of elements for flatdur, using fudgefactor to ensure extends at least beyond flatdur
	longdur_vec
	sum(longdur_vec)
	avgdur <- sum(longdur_vec)/longrows

	longqty <- round(rnorm(n = longrows, mean = 100, sd = 10), 0)
	longqty

	# Now short periods (adding randomisation for position sizes)
	lsratio <- lsratio
	shortdur <- targetdur - flatdur - longdur
	shortdur
	shortdur_mean <- 45 # choose a flat duration mean that makes sense to your random use case
	shortdur_stddev <- 15 # choose a flat duration std dev that makes sense to your random use case
	shortrows <- round(shortdur/rnorm(n = 1, mean = shortdur_mean, sd = shortdur_stddev))
	shortdur_vec <- round(rnorm(n = shortrows, mean = shortdur_mean, sd = shortdur_stddev),0) # compute vector of elements for flatdur, using fudgefactor to ensure extends at least beyond flatdur
	shortdur_vec
	sum(shortdur_vec)
	avgdur <- sum(shortdur_vec)/shortrows

	shortqty <- round(rnorm(n = shortrows, mean = -100, sd = 10), 0)
	shortqty

	# TODO: randomise levels

	subsample <- function(svector, targetdur, replacement=TRUE, ...,duration, qty) {
	#`trades` already exists in function scope

	dur <- 0 # initialize duration counter
	tdf <- data.frame() #initialize output data.frame
	nsamples <- round(length(svector) * fudgefactor, 0)
	while (dur < targetdur) {
	s <- sample(svector, nsamples, replace = replacement)
	# sdf <- data.frame(duration = trades[s,'duration'],
	# quantity = trades[s,'quantity'])
	sdf <- data.frame(duration = duration[s],
	quantity = qty[s])
	if (is.null(tdf$duration)) {
	tdf <- sdf
	} else {
	tdf <- rbind(tdf, sdf)
	}
	dur <- sum(tdf$duration)
	nsamples <- round(((targetdur - dur) / avgdur) * fudgefactor, 0)
	nsamples <- ifelse(nsamples == 0, 1, nsamples)
	# print(nsamples) # for debugging
	dur
	}
	# could truncate data frame here to correct total duration
	# the row which takes our duration over the target
	xsrow <- last(which(cumsum(as.numeric(tdf$duration)) < (targetdur))) + 1
	if (xsrow == nrow(tdf)) {
	# the last row sampled takes us over targetdur
	adjxsrow <- sum(tdf$duration) - targetdur
	tdf$duration[xsrow] <- tdf$duration[xsrow] - adjxsrow
	} else if (xsrow < nrow(tdf)) {
	# the last iteration of the while loop added more than one row
	# which took our duration over the target
	tdf <- tdf[-seq.int(xsrow + 1, nrow(tdf), 1), ]
	adjxsrow <- sum(tdf$duration) - targetdur
	tdf$duration[xsrow] <- tdf$duration[xsrow] - adjxsrow
	}

	tdf # return target data frame
	} # end subsample

	#sample long, short, flat periods
	if(flatdur > 0){
	a <- flatdur/sum(flatdur_vec)
	fudgefactor <- ceiling(a * 100) / 100
	flatdf <- subsample(svector = flatrows, targetdur = flatdur, duration = flatdur_vec, qty = flatqty)
	} else {
	flatdf <- NULL
	}
	if(longdur > 0){ # ie. there are long round turn trades in the strategy
	a <- longdur/sum(longdur_vec)
	fudgefactor <- ceiling(a * 100) / 100
	longdf <- subsample(svector = longrows, targetdur = longdur, duration = longdur_vec, qty = longqty)
	} else {
	longdf <- NULL
	}
	if(shortdur > 0){ # ie. there are short round turn trades in the strategy
	a <- shortdur/sum(shortdur_vec)
	fudgefactor <- ceiling(a * 100) / 100
	shortdf <- subsample(svector = shortrows, targetdur = shortdur, duration = shortdur_vec, qty = shortqty)
	} else {
	shortdf <- NULL
	}
	# Lines 129-162 are directly from txnsim...as we are not levelling for now, there is no need to concern ourselves with layers
	# #browser()
	# # make the first layer
	# # 1. start with flat periods
	# firstlayer <- flatdf
	# # 2. segment trades for first layer
	# targetlongdur <- structure(round((calendardur-flatdur)*lsratio),units='secs',class='difftime')
	# if(!is.null(longdf)){ # ie. there are long round turn trades in the strategy
	# targetlongrow <- last(which(cumsum(as.numeric(longdf$duration))<targetlongdur))
	# firstlayer <- rbind(firstlayer,longdf[1:targetlongrow,])
	# } else {
	# targetlongrow <- 0
	# }
	# # firstlayer <- rbind(firstlayer,longdf[1:targetlongrow,])
	# if(!is.null(shortdf)){ # ie. there are short round turn trades in the strategy
	# targetshortrow <- last( which( cumsum(as.numeric(shortdf$duration))<(calendardur-sum(firstlayer$duration)) ) )
	# firstlayer <- rbind(firstlayer,shortdf[1:targetshortrow,])
	# } else {
	# targetshortrow <- 0
	# }
	# firstlayer <- firstlayer[sample(nrow(firstlayer),replace=FALSE),]
	# # firstlayer should be just slightly longer than calendardur, we'll truncate later
	#
	# tdf <- firstlayer # establish target data.frame
	#
	# # build a vector of start times
	# start <- first(trades$start) + cumsum(as.numeric(tdf$duration))
	# # add the first start time back in
	# start <- c(first(trades$start), start)
	# # take off the last end time, since we won't put in a closing trade
	# start <- start[-length(start)]
	# # add start column to tdf
	# tdf$start <- start
	# # rearrange columns for consistency
	# tdf <- tdf[, c("start", "duration", "quantity")]

	# Since we now have flatdf, longdf and shortdf we are in a position to combine the dataframes
	# to make a single dataframe of durations and quantities for our random strategy which we will
	# add transaction based on. To start, we will randomly select a flat pertiod and then sample
	# the remaining rows. This is to mirror production backtests which generally require lead time
	# before rules are activated and positions opened.
	startrow <- sample(flatrows, size = 1)
	# idx <- c(startrow, as.numeric(row.names((flatdf[-startrow,]))))
	startdf <- flatdf[startrow,]
	flatdf <- flatdf[-startrow,]
	mergedf <- rbind(flatdf, longdf, shortdf)
	mergedf <- mergedf[order(sample(nrow(mergedf), size = nrow(mergedf))),]
	mergedf <- rbind(startdf, mergedf)
	mergedf

	# Add transactions
	# First get prices
	dargs <- list()
	if (!is.null(dargs$env)) {
	env <- dargs$env
	} else {
	env <- .GlobalEnv
	}
	if (!is.null(dargs$prefer)) {
	prefer <- dargs$prefer
	} else {
	prefer <- NULL
	}
	# prices <- get('GSPC')
	prices <- getPrice(get("GSPC", pos = env), prefer = prefer)[, 1]
	txns <- list()

	dur_cumsum <- cumsum(mergedf$duration)
	# Now for tranactions
	for (r in 1:nrow(mergedf)) {
	# opening trade
	open <- data.frame(
	if(r == 1) {
	start = index(prices[dur_cumsum[r] - mergedf[r,1] + 1])
	} else {
	start = index(prices[dur_cumsum[r] - mergedf[r,1]])
	},
	TxnQty = mergedf[r, "quantity"],
	TxnPrice = as.numeric(prices[start])
	)
	colnames(open) <- c("start","TxnQty","TxnPrice")
	# closing trade
	close <-
	data.frame(
	start = index(prices[dur_cumsum[r]]),
	TxnQty = -1 * mergedf[r, "quantity"],
	TxnPrice = as.numeric(prices[dur_cumsum[r]])
	)
	txns[[r]] <- rbind(open, close)
	} # end loop over rows
	txns <- do.call(rbind, txns)
	txns <- xts(txns[, c("TxnQty", "TxnPrice")], order.by = txns[, 1])
	txns <- txns[which(txns$TxnQty != 0), ]
	txns

	# portname <- "random_trader"
	symbol <- "GSPC"
	# initPortf(portname, "GSPC")
	addTxns(Portfolio = "txnsim_rnorm_port",
	Symbol = symbol,
	TxnData = txns)
	updatePortf(Portfolio = "txnsim_rnorm_port")
	chart.Posn("random_trader", "GSPC")

	ex.txnsim <- function(Portfolio
	,n=10
	,replacement=FALSE
	, tradeDef='increased.to.reduced'
	, chart=FALSE
	)
	{
	out <- txnsim(Portfolio,n,replacement, tradeDef = tradeDef)
	if(isTRUE(chart)) {
	portnames <- blotter:::txnsim.portnames(Portfolio, replacement, n)
	for (i in 1:n){
	p<- portnames[i]
	symbols<-names(getPortfolio(p)$symbols)
	for(symbol in symbols) {
	dev.new()
	chart.Posn(p,symbol)
	}
	}
	}
	invisible(out)
	} # end ex.txnsim

	rnorm.wr <- ex.txnsim('txnsim_rnorm_port',1000, replacement = TRUE, chart = FALSE)
	plot(rnorm.wr)
	rnorm.wr$pvalues
	hist(rnorm.wr)