Last active
March 20, 2019 11:13
-
-
Save theredpea/ce5840664b0fb3985b67df654964f76a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//EXIT SCRIPT; | |
//Rob Wunderlich shows IntervalMatch for a binning technique: | |
//https://community.qlik.com/message/858949#858949 | |
//https://community.qlikview.com/thread/72476 | |
//https://community.qlik.com/blogs/qlikviewdesignblog/2016/01/15/autogenerate-your-data | |
//https://community.qlik.com/blogs/qlikviewdesignblog/2014/08/13/recipe-for-a-histogram | |
//For the reference lines in the "histograms"; | |
//was once NumTrials * BinWidth, but that's not valid since adding | |
SET UniformExpectedDensity = COUNT(DISTINCT ID) * BinWidth; | |
//Will use on the "conditional" sheets | |
SET InsideCircleId = IF(DIST<1, ID); | |
//ONLY CHANGE if you know the actual limit; I think this is a *fixed* amount that Qlik decides to switch from dot to density | |
SET NumPointsLimit = 1000; | |
LET BinWidth = 1/SQRT(NumPointsLimit); // 0.0316227766016838 | |
//The odd is not cleanly divisible into 1 an integer-number of times; 31.62277 | |
//So let's go down to the next BinWidth | |
//Ah but this still produces an odd number : 0.032258064516129 | |
//even though there are an integer-number of these buckets spread throughout | |
//It must be 0.05 or some multiple of this, otherwise it leaves "halvsies buckets" at either end | |
LET BinWidth = (1/FLOOR(SQRT(NumPointsLimit))); // FLOOR(SQRT(NumPointsLimit)) === 31; | |
//Still too wide | |
LET BinWidth = 1/33; | |
//Works, so the value is somewhere between 33 and 40 squared? | |
LET BinWidth = 1/40; | |
//This gives a very even distribution, and the "tartan" effect is not as strong | |
//But filtering NumTrials is slow | |
LET BinWidth = 1/100; | |
LET BinWidth= 1/36; | |
//But the bar charts at bottom actually compress you more? | |
//LET BinWidth = 0.05; | |
//Therefore settling of a final BinWidth of: | |
//No, none of these work --moreover it pushes my NumIntervals to produce more than the NumPointsLimit, | |
//so the Scatter plot doesn't work, it looks like a funny gridded density chart -- | |
// LET BinWidth = 0.025; //5; | |
// LET BinWidth = 0.05; | |
//Too wide: 0.05; | |
// | |
SET Range = 1; | |
LET NumIntervals = Range/BinWidth; | |
Levels: | |
LOAD * INLINE [ | |
Level | |
100 | |
1000 | |
10000 | |
100000 | |
1000000]; //1M | |
LET NumLevels=NoOfRows('Levels'); | |
//Could also just loop through a "literal array" | |
//http://help.qlik.com/en-US/qlikview/12.1/Subsystems/Client/Content/Scripting/ScriptControlStatements/For%20Each.htm | |
// for each a in 1,3,7,'xyz' | |
//https://community.qlik.com/message/1272152#1272152 | |
//Only gonna subtract "1" in the right-inclusive-range of the FOR loop: (Hopefully this only evaluates once) | |
//Must use 0 index | |
FOR l=0 to $(NumLevels)-1 | |
LET vLevel=Peek('Level', $(l), 'Levels'); | |
//Using RecNo() deliberately ; remember its meaning | |
//ID marks the source row ID (alphabetically R`e`c comes before R`o`w) | |
//1) Must be consistent with the Trials' table `ID` column | |
//2) RowNo() here would 10e5+10e4+10e3+10e2+10e1 *more* IDs than could join with the Trials table (only 10e6 rows); so must use RecNo() | |
//2a) Fine because doubling up on an ID of "2" does not cause double-counting of the number; | |
//2b) Indeed, if they had a broad Level picked, it would make no difference to select (i.e. filter on) a more granular Level | |
Level_Lookup: | |
LOAD | |
$(vLevel) as Level, | |
RecNo() as ID | |
AUTOGENERATE $(vLevel); | |
//Next level... | |
NEXT l; | |
//NumTrials is the largest number in the Levels table... | |
//PREFERABLY greater than NumPointsLimit else the "Density" charts don't make as much sense? | |
//TODO: Test | |
LET NumTrials = PEEK('Level', -1, 'Levels'); | |
//Using preceding load to "do the science" of a simulated experiment; | |
//Flip two coins for independent Y and X locations; remember this produces results with a "Uniform Distribution" | |
//Then figure the distance from Y, X to the origin point of the fictional square (using pythagorean theorem) | |
Trials: | |
LOAD *, | |
IF(DIST<1, X) as CondX, | |
IF(DIST<1, Y) as CondY, | |
IF(DIST<1, ID) as CondID; | |
LOAD | |
*, | |
SQRT(X*X + Y*Y) as DIST; | |
LOAD | |
RAND()*$(Range) as Y, | |
RAND()*$(Range) as X, | |
RECNO() as ID | |
AUTOGENERATE NumTrials; | |
Bins: | |
LOAD | |
//The whole idea of BIN is to give you explicit dimensions to use; | |
//Dimensions that are actually kinda continuous, between 0 and 1. | |
$(BinWidth) * (RECNO()-1) + $(BinWidth)/2 as Bin, | |
//RECNO() would not be continuous, definitely not bounded between 0 and 1 | |
//RECNO() as Bin, | |
$(BinWidth) * (RECNO()-1) as Start, | |
$(BinWidth) * (RECNO()) as Stop | |
AUTOGENERATE ($(NumIntervals)); | |
//Need two separate Match tables to conduct an IntervalMatch, | |
//That's right; can't just substitute these LOAD statements in place for exmaple; | |
// | |
XBins: LOAD *, Bin as XBin RESIDENT Bins; | |
YBins: LOAD *, Bin as YBin RESIDENT Bins; | |
//IntervalMatch is the crux of how load-statement binning works... | |
INTERVALMATCH(X) | |
LEFT JOIN(XBins) | |
LOAD Start, Stop RESIDENT Bins; | |
//IntervalMatch is the crux of how load-statement binning works... | |
INTERVALMATCH(Y) | |
LEFT JOIN(YBins) | |
LOAD Start, Stop RESIDENT Bins; | |
//Drop fields else X and Y will be connected and we'll get cyclic relationships | |
DROP FIELD Start, Stop, Bin; | |
//Clocked at 21 seconds to load this data | |
//Then at 23 seconds | |
//18 seconds | |
//17 seconds | |
//14 | |
//19 | |
//13 seconds; I notice a DoSave() call in the WS frames automatically @ end of load progress | |
//Which happens when I edit this script; notice asterisk (dirty flag) in <title>; Ctrl+S ; it will DoSavE() | |
//TODO: Research and implement other binning techniques, including the new feature in Qlik's June 2017 release | |
//https://community.qlik.com/thread/158914 | |
//TODO: Make a visual explanation of how the class function works | |
//https://community.qlik.com/thread/87214 | |
//Two main developments | |
//Using CondID instead of IF(Length<1, ID); the former being pre-script; latter post-script. Hoping for pre-script tradeoff = performance gains | |
//Distance between timestamp of response to GetHyperCubeBinnedData, and my own "clocking" as soon as I noticed the adaptive scatterplot was finished drawing; | |
//timestamp: | |
//22:20:49.700 something | |
//clock: | |
//new Date() | |
//22:20:56.830 Thu Aug 17 2017 22:20:57 GMT-0600 (Mountain Daylight Time) | |
//7 seconds just to draw it!!! | |
// width: 552px; | |
// height: 200px; | |
// transform: rotate3d(0, 0, 1, 90deg) translateX(40%) translateY(50%) scaleX(-1); | |
//this portion was added to the load script (in a separate, locked sheet), when I experimented with bucketing in Data Manager: | |
//https://gist.github.com/theredpea/254e02400f0d9d101511fcd80da67966 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment