theredpea · March 20, 2019 11:13
diff --git a/monte_carlo.qvs b/monte_carlo.qvs
 //EXIT SCRIPT;

 //Rob Wunderlich shows IntervalMatch for a binning technique:
 //https://community.qlik.com/message/858949#858949
 //https://community.qlikview.com/thread/72476
 //https://community.qlik.com/blogs/qlikviewdesignblog/2016/01/15/autogenerate-your-data
 //https://community.qlik.com/blogs/qlikviewdesignblog/2014/08/13/recipe-for-a-histogram
 
 //For the reference lines in the "histograms";
 //was once NumTrials * BinWidth, but that's not valid since adding 
 SET UniformExpectedDensity = COUNT(DISTINCT ID) * BinWidth;
 
 //Will use on the "conditional" sheets
 SET InsideCircleId = IF(DIST<1, ID);
 

 //ONLY CHANGE if you know the actual limit; I think this is a *fixed* amount that Qlik decides to switch from dot to density
 SET NumPointsLimit = 1000;


 LET BinWidth = 1/SQRT(NumPointsLimit); // 0.0316227766016838
 //The odd  is not cleanly divisible into 1 an integer-number of times; 31.62277

 //So let's go down to the next BinWidth
 //Ah but this still produces an odd number : 0.032258064516129
 //even though there are an integer-number of these buckets spread throughout
 //It must be 0.05 or some multiple of this, otherwise it leaves "halvsies buckets" at either end
 LET BinWidth = (1/FLOOR(SQRT(NumPointsLimit))); // FLOOR(SQRT(NumPointsLimit)) === 31;

 //Still too wide
 LET BinWidth = 1/33;
 //Works, so the value is somewhere between 33 and 40 squared?
 LET BinWidth = 1/40;

 //This gives a very even distribution, and the "tartan" effect is not as strong
 //But filtering NumTrials is slow
 LET BinWidth = 1/100;

 LET BinWidth= 1/36;



 //But the bar charts at bottom actually compress you more?
 //LET BinWidth = 0.05;

 //Therefore settling of a final BinWidth of:
 //No, none of these  work --moreover it pushes my  NumIntervals to produce more than the NumPointsLimit, 
 //so the Scatter plot doesn't work, it looks like a funny gridded density chart --
 // LET BinWidth = 0.025; //5;
 // LET BinWidth = 0.05;

 //Too wide: 0.05;
 //
 SET Range = 1;
 LET NumIntervals = Range/BinWidth;

 Levels:
 LOAD * INLINE [
 Level
 100
 1000
 10000
 100000
 1000000]; //1M

 LET NumLevels=NoOfRows('Levels');


 //Could also just loop through a "literal array"
 //http://help.qlik.com/en-US/qlikview/12.1/Subsystems/Client/Content/Scripting/ScriptControlStatements/For%20Each.htm
 // for each a in 1,3,7,'xyz'
 //https://community.qlik.com/message/1272152#1272152
 //Only gonna subtract "1" in the right-inclusive-range of the FOR loop: (Hopefully this only evaluates once)
 //Must use 0 index
 FOR l=0 to $(NumLevels)-1

  LET vLevel=Peek('Level', $(l), 'Levels');
  
  //Using RecNo() deliberately ; remember its meaning
  //ID marks the source row ID (alphabetically R`e`c comes before R`o`w)
  //1) Must be consistent with the Trials' table `ID` column
  //2) RowNo() here would 10e5+10e4+10e3+10e2+10e1 *more* IDs than could join with the Trials table (only 10e6 rows); so must use RecNo()
  //2a) Fine because doubling up on an ID of "2" does not cause double-counting of the number;
  //2b) Indeed, if they had a broad Level picked, it would make no difference to select (i.e. filter on) a more granular Level 
  Level_Lookup:
  LOAD 
  $(vLevel) as Level,
  RecNo() as ID
  AUTOGENERATE $(vLevel);
  
 //Next level...
 NEXT l;
    
 //NumTrials is the largest number in the Levels table...
 //PREFERABLY greater than NumPointsLimit else the "Density" charts don't make as much sense?
 //TODO: Test
 LET NumTrials = PEEK('Level', -1, 'Levels');

 //Using preceding load to "do the science" of a simulated experiment;
 //Flip two coins for independent Y and X locations; remember this produces results with a "Uniform Distribution"
 //Then figure the distance from Y, X to the origin point of the fictional square (using pythagorean theorem)
 Trials:
 LOAD *,
     IF(DIST<1, X) as CondX,
     IF(DIST<1, Y) as CondY,
     IF(DIST<1, ID) as CondID;
     
 LOAD
 	*,
 	SQRT(X*X + Y*Y) as DIST;
 LOAD
 	RAND()*$(Range) as Y,
    RAND()*$(Range) as X,
    RECNO() as ID
    AUTOGENERATE NumTrials;
    
    
 Bins:
 LOAD 
 //The whole idea of BIN is to give you explicit dimensions to use; 
 //Dimensions that are actually kinda continuous, between 0 and 1.
 $(BinWidth) * (RECNO()-1) + $(BinWidth)/2 as Bin,
 //RECNO() would not be continuous, definitely not bounded between 0 and 1
 //RECNO() as Bin,
 $(BinWidth) * (RECNO()-1) as Start,
 $(BinWidth) * (RECNO()) as Stop
 AUTOGENERATE ($(NumIntervals));
    
 //Need two separate Match tables to conduct an IntervalMatch,
 //That's right; can't just substitute these LOAD statements in place for exmaple;
 //
 XBins: LOAD *, Bin as XBin RESIDENT Bins;
 YBins: LOAD *, Bin as YBin RESIDENT Bins;

 //IntervalMatch is the crux of how load-statement  binning  works...
 INTERVALMATCH(X)
 LEFT JOIN(XBins)
 LOAD Start,  Stop RESIDENT Bins;


 //IntervalMatch is the crux of how load-statement  binning  works...
 INTERVALMATCH(Y)
 LEFT JOIN(YBins)
 LOAD Start,  Stop RESIDENT Bins;

 //Drop fields else X and Y will be connected and we'll get cyclic relationships
 DROP FIELD Start, Stop, Bin;

 //Clocked at 21 seconds to load this data
 //Then at 23 seconds
 //18 seconds
 //17 seconds
 //14
 //19
 //13 seconds; I notice a DoSave() call in the WS frames automatically @ end of load progress
 //Which happens when I edit this script; notice asterisk (dirty flag) in <title>; Ctrl+S ; it will DoSavE()

 //TODO:  Research and implement other binning techniques, including the new feature in Qlik's June 2017  release
 //https://community.qlik.com/thread/158914

 //TODO: Make a visual explanation of how the class function  works
 //https://community.qlik.com/thread/87214

 //Two main developments
 //Using CondID instead of IF(Length<1, ID); the former being pre-script; latter post-script. Hoping for pre-script tradeoff = performance gains
 //Distance between timestamp of response to GetHyperCubeBinnedData, and my own "clocking" as soon as I noticed the adaptive scatterplot was finished drawing;
 //timestamp:
 //22:20:49.700 something
 //clock:
 //new Date()
 //22:20:56.830 Thu Aug 17 2017 22:20:57 GMT-0600 (Mountain Daylight Time)
 //7 seconds just to draw it!!!

 //     width: 552px;
 //     height: 200px;
 //     transform: rotate3d(0, 0, 1, 90deg) translateX(40%) translateY(50%) scaleX(-1);

 //this portion was added to the load script (in a separate, locked sheet), when I experimented with bucketing in Data Manager:
 //https://gist.github.com/theredpea/254e02400f0d9d101511fcd80da67966
	//EXIT SCRIPT;

	//Rob Wunderlich shows IntervalMatch for a binning technique:
	//https://community.qlik.com/message/858949#858949
	//https://community.qlikview.com/thread/72476
	//https://community.qlik.com/blogs/qlikviewdesignblog/2016/01/15/autogenerate-your-data
	//https://community.qlik.com/blogs/qlikviewdesignblog/2014/08/13/recipe-for-a-histogram

	//For the reference lines in the "histograms";
	//was once NumTrials * BinWidth, but that's not valid since adding
	SET UniformExpectedDensity = COUNT(DISTINCT ID) * BinWidth;

	//Will use on the "conditional" sheets
	SET InsideCircleId = IF(DIST<1, ID);


	//ONLY CHANGE if you know the actual limit; I think this is a fixed amount that Qlik decides to switch from dot to density
	SET NumPointsLimit = 1000;


	LET BinWidth = 1/SQRT(NumPointsLimit); // 0.0316227766016838
	//The odd is not cleanly divisible into 1 an integer-number of times; 31.62277

	//So let's go down to the next BinWidth
	//Ah but this still produces an odd number : 0.032258064516129
	//even though there are an integer-number of these buckets spread throughout
	//It must be 0.05 or some multiple of this, otherwise it leaves "halvsies buckets" at either end
	LET BinWidth = (1/FLOOR(SQRT(NumPointsLimit))); // FLOOR(SQRT(NumPointsLimit)) === 31;

	//Still too wide
	LET BinWidth = 1/33;
	//Works, so the value is somewhere between 33 and 40 squared?
	LET BinWidth = 1/40;

	//This gives a very even distribution, and the "tartan" effect is not as strong
	//But filtering NumTrials is slow
	LET BinWidth = 1/100;

	LET BinWidth= 1/36;



	//But the bar charts at bottom actually compress you more?
	//LET BinWidth = 0.05;

	//Therefore settling of a final BinWidth of:
	//No, none of these work --moreover it pushes my NumIntervals to produce more than the NumPointsLimit,
	//so the Scatter plot doesn't work, it looks like a funny gridded density chart --
	// LET BinWidth = 0.025; //5;
	// LET BinWidth = 0.05;

	//Too wide: 0.05;
	//
	SET Range = 1;
	LET NumIntervals = Range/BinWidth;

	Levels:
	LOAD * INLINE [
	Level
	100
	1000
	10000
	100000
	1000000]; //1M

	LET NumLevels=NoOfRows('Levels');


	//Could also just loop through a "literal array"
	//http://help.qlik.com/en-US/qlikview/12.1/Subsystems/Client/Content/Scripting/ScriptControlStatements/For%20Each.htm
	// for each a in 1,3,7,'xyz'
	//https://community.qlik.com/message/1272152#1272152
	//Only gonna subtract "1" in the right-inclusive-range of the FOR loop: (Hopefully this only evaluates once)
	//Must use 0 index
	FOR l=0 to $(NumLevels)-1

	LET vLevel=Peek('Level', $(l), 'Levels');

	//Using RecNo() deliberately ; remember its meaning
	//ID marks the source row ID (alphabetically R`e`c comes before R`o`w)
	//1) Must be consistent with the Trials' table `ID` column
	//2) RowNo() here would 10e5+10e4+10e3+10e2+10e1 more IDs than could join with the Trials table (only 10e6 rows); so must use RecNo()
	//2a) Fine because doubling up on an ID of "2" does not cause double-counting of the number;
	//2b) Indeed, if they had a broad Level picked, it would make no difference to select (i.e. filter on) a more granular Level
	Level_Lookup:
	LOAD
	$(vLevel) as Level,
	RecNo() as ID
	AUTOGENERATE $(vLevel);

	//Next level...
	NEXT l;

	//NumTrials is the largest number in the Levels table...
	//PREFERABLY greater than NumPointsLimit else the "Density" charts don't make as much sense?
	//TODO: Test
	LET NumTrials = PEEK('Level', -1, 'Levels');

	//Using preceding load to "do the science" of a simulated experiment;
	//Flip two coins for independent Y and X locations; remember this produces results with a "Uniform Distribution"
	//Then figure the distance from Y, X to the origin point of the fictional square (using pythagorean theorem)
	Trials:
	LOAD *,
	IF(DIST<1, X) as CondX,
	IF(DIST<1, Y) as CondY,
	IF(DIST<1, ID) as CondID;

	LOAD
	*,
	SQRT(XX + YY) as DIST;
	LOAD
	RAND()*$(Range) as Y,
	RAND()*$(Range) as X,
	RECNO() as ID
	AUTOGENERATE NumTrials;


	Bins:
	LOAD
	//The whole idea of BIN is to give you explicit dimensions to use;
	//Dimensions that are actually kinda continuous, between 0 and 1.
	$(BinWidth) * (RECNO()-1) + $(BinWidth)/2 as Bin,
	//RECNO() would not be continuous, definitely not bounded between 0 and 1
	//RECNO() as Bin,
	$(BinWidth) * (RECNO()-1) as Start,
	$(BinWidth) * (RECNO()) as Stop
	AUTOGENERATE ($(NumIntervals));

	//Need two separate Match tables to conduct an IntervalMatch,
	//That's right; can't just substitute these LOAD statements in place for exmaple;
	//
	XBins: LOAD *, Bin as XBin RESIDENT Bins;
	YBins: LOAD *, Bin as YBin RESIDENT Bins;

	//IntervalMatch is the crux of how load-statement binning works...
	INTERVALMATCH(X)
	LEFT JOIN(XBins)
	LOAD Start, Stop RESIDENT Bins;


	//IntervalMatch is the crux of how load-statement binning works...
	INTERVALMATCH(Y)
	LEFT JOIN(YBins)
	LOAD Start, Stop RESIDENT Bins;

	//Drop fields else X and Y will be connected and we'll get cyclic relationships
	DROP FIELD Start, Stop, Bin;

	//Clocked at 21 seconds to load this data
	//Then at 23 seconds
	//18 seconds
	//17 seconds
	//14
	//19
	//13 seconds; I notice a DoSave() call in the WS frames automatically @ end of load progress
	//Which happens when I edit this script; notice asterisk (dirty flag) in <title>; Ctrl+S ; it will DoSavE()

	//TODO: Research and implement other binning techniques, including the new feature in Qlik's June 2017 release
	//https://community.qlik.com/thread/158914

	//TODO: Make a visual explanation of how the class function works
	//https://community.qlik.com/thread/87214

	//Two main developments
	//Using CondID instead of IF(Length<1, ID); the former being pre-script; latter post-script. Hoping for pre-script tradeoff = performance gains
	//Distance between timestamp of response to GetHyperCubeBinnedData, and my own "clocking" as soon as I noticed the adaptive scatterplot was finished drawing;
	//timestamp:
	//22:20:49.700 something
	//clock:
	//new Date()
	//22:20:56.830 Thu Aug 17 2017 22:20:57 GMT-0600 (Mountain Daylight Time)
	//7 seconds just to draw it!!!

	// width: 552px;
	// height: 200px;
	// transform: rotate3d(0, 0, 1, 90deg) translateX(40%) translateY(50%) scaleX(-1);

	//this portion was added to the load script (in a separate, locked sheet), when I experimented with bucketing in Data Manager:
	//https://gist.github.com/theredpea/254e02400f0d9d101511fcd80da67966