3Nigma · August 29, 2015 13:56 · 3Nigma · Feb 12, 2014
diff --git a/gistfile1.matlab b/gistfile1.matlab
 function totalRewards = simulate_epsg_n_bandit(n, eps, gamecnt, rollcnt)
  # make room for the estimated value - action [Q_t(a)]
  totalRewards = zeros(1, rollcnt);

  for k = 1:gamecnt
    # generate the true value - action [q_*(a)] from a normal distribution of mean 0 and variance 1 
    q = randn(1, n);
    
    # reset some game related, auxiliary variables
    Qavg   = zeros(1, n);
    Qsum   = zeros(1, n);
    nPulls = zeros(1, n);

    for i = 1:rollcnt
      if (unifrnd(0, 1) <= 1 - eps)
        # do a tiebraking, exploitation step
        idsQstep = find(Qavg == max(Qavg));
        randperm(length(idsQstep));
        iQstep = idsQstep(1);
      else
        # do an exploration step
        iQstep = unidrnd(n);
      endif
      
      # calculate the reward and integrate it into the knwoledge base
      Rk = q(iQstep) + normrnd(0, 1);
      totalRewards(i) = totalRewards(i) + Rk;
      Qsum(iQstep) = Qsum(iQstep) + Rk;
      nPulls(iQstep) = nPulls(iQstep) + 1;
      Qavg(iQstep) = Qsum(iQstep)/nPulls(iQstep);
    endfor
  endfor
 endfunction
	function totalRewards = simulate_epsg_n_bandit(n, eps, gamecnt, rollcnt)
	# make room for the estimated value - action [Q_t(a)]
	totalRewards = zeros(1, rollcnt);

	for k = 1:gamecnt
	# generate the true value - action [q_*(a)] from a normal distribution of mean 0 and variance 1
	q = randn(1, n);

	# reset some game related, auxiliary variables
	Qavg = zeros(1, n);
	Qsum = zeros(1, n);
	nPulls = zeros(1, n);

	for i = 1:rollcnt
	if (unifrnd(0, 1) <= 1 - eps)
	# do a tiebraking, exploitation step
	idsQstep = find(Qavg == max(Qavg));
	randperm(length(idsQstep));
	iQstep = idsQstep(1);
	else
	# do an exploration step
	iQstep = unidrnd(n);
	endif

	# calculate the reward and integrate it into the knwoledge base
	Rk = q(iQstep) + normrnd(0, 1);
	totalRewards(i) = totalRewards(i) + Rk;
	Qsum(iQstep) = Qsum(iQstep) + Rk;
	nPulls(iQstep) = nPulls(iQstep) + 1;
	Qavg(iQstep) = Qsum(iQstep)/nPulls(iQstep);
	endfor
	endfor
	endfunction