Skip to content

Instantly share code, notes, and snippets.

@osroca
Last active September 22, 2016 09:48

Revisions

  1. osroca revised this gist Sep 22, 2016. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion index.html
    Original file line number Diff line number Diff line change
    @@ -31,7 +31,7 @@

    </style>
    <body>
    <script src="http://d3js.org/d3.v3.min.js"></script>
    <script src="https://d3js.org/d3.v3.min.js"></script>
    <!--script src="http://localhost:1025/static/js/d3.v3.min.js"></script-->
    <script>

  2. osroca created this gist Nov 10, 2014.
    1 change: 1 addition & 0 deletions README.md
    Original file line number Diff line number Diff line change
    @@ -0,0 +1 @@
    A visualization of blood insuline from the UCI diabetes dataset. The distribution is stored with a streaming histogram. Brush to zoom. Click to zoom out.
    239 changes: 239 additions & 0 deletions diabetes-000004.json
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,239 @@
    {
    "anomaly_seed": "2c249dda00fbf54ab4cdd850532a584f286af5b6",
    "category": 0,
    "code": 200,
    "columns": 1,
    "constraints": false,
    "created": "2014-10-28T20:13:11.111000",
    "credits": 0.69940185546875,
    "credits_per_prediction": 0.0,
    "dataset": "dataset/5427f656ec65d11671000004",
    "dataset_field_types": {
    "categorical": 1,
    "datetime": 0,
    "numeric": 8,
    "preferred": 9,
    "text": 0,
    "total": 9
    },
    "dataset_status": true,
    "dataset_type": 0,
    "description": "",
    "excluded_fields": [],
    "fields_meta": {
    "count": 1,
    "limit": 1000,
    "offset": 0,
    "query_total": 1,
    "total": 9
    },
    "forest_size": 128,
    "id_fields": [],
    "input_fields": [
    "000000",
    "000001",
    "000002",
    "000003",
    "000004",
    "000005",
    "000006",
    "000007",
    "000008"
    ],
    "locale": "en-US",
    "max_columns": 9,
    "max_rows": 768,
    "model": {
    "fields": {
    "000004": {
    "column_number": 4,
    "datatype": "int16",
    "name": "insulin",
    "optype": "numeric",
    "order": 0,
    "preferred": true,
    "summary": {
    "bins": [
    [
    0,
    374
    ],
    [
    19.33333,
    9
    ],
    [
    43.4375,
    32
    ],
    [
    59.894739999999999,
    38
    ],
    [
    74.384619999999998,
    26
    ],
    [
    91.217389999999995,
    46
    ],
    [
    112.23256000000001,
    43
    ],
    [
    134.34884,
    43
    ],
    [
    160.17646999999999,
    34
    ],
    [
    184.35293999999999,
    34
    ],
    [
    208.57894999999999,
    19
    ],
    [
    232,
    11
    ],
    [
    251.40000000000001,
    5
    ],
    [
    272.69999999999999,
    10
    ],
    [
    288.5,
    6
    ],
    [
    304.66667000000001,
    3
    ],
    [
    324.75,
    8
    ],
    [
    338.5,
    2
    ],
    [
    368.33332999999999,
    3
    ],
    [
    393.66667000000001,
    3
    ],
    [
    415,
    1
    ],
    [
    440,
    1
    ],
    [
    465,
    1
    ],
    [
    479.39999999999998,
    5
    ],
    [
    495,
    2
    ],
    [
    510,
    1
    ],
    [
    542.66666999999995,
    3
    ],
    [
    579,
    1
    ],
    [
    600,
    1
    ],
    [
    680,
    1
    ],
    [
    744,
    1
    ],
    [
    846,
    1
    ]
    ],
    "maximum": 846,
    "mean": 79.799480000000003,
    "median": 30.5,
    "minimum": 0,
    "missing_count": 0,
    "population": 768,
    "splits": [
    0.18842999999999999,
    30.5,
    127.5
    ],
    "standard_deviation": 115.244,
    "sum": 61286,
    "sum_squares": 15077256,
    "variance": 13281.18008
    }
    }
    },
    "kind": "iforest",
    "mean_depth": 14.774407996894411
    },
    "name": "diabetes' dataset anomaly detector",
    "number_of_anomalyscores": 0,
    "number_of_batchanomalyscores": 0,
    "number_of_public_anomalyscores": 0,
    "out_of_bag": false,
    "price": 0.0,
    "private": true,
    "project": "project/542537ecec65d1fc17000279",
    "range": [
    1,
    768
    ],
    "replacement": false,
    "resource": "anomaly/544ff8d7ec65d102f200067d",
    "rows": 768,
    "sample_rate": 1.0,
    "sample_size": 483,
    "shared": false,
    "size": 26192,
    "source": "source/5427f64dec65d11671000000",
    "source_status": true,
    "status": {
    "code": 5,
    "elapsed": 2514,
    "message": "The anomaly detector has been created",
    "progress": 1.0
    },
    "subscription": true,
    "tags": [],
    "top_n": 10,
    "updated": "2014-10-28T20:13:23.050000",
    "white_box": false
    }
    301 changes: 301 additions & 0 deletions index.html
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,301 @@
    <!DOCTYPE html>
    <meta charset="utf-8">
    <style>

    .bar {
    fill: #5AC;
    opacity: 0.6;
    }

    .axis {
    font: 12px sans-serif;
    }

    .axis path,
    .axis line {
    fill: none;
    stroke: #000;
    shape-rendering: crispEdges;
    }

    .brush .extent {
    stroke: #fff;
    fill-opacity: .125;
    shape-rendering: crispEdges;
    }

    .fname {
    font: 12px sans-serif;
    font-weight: bold;
    }

    </style>
    <body>
    <script src="http://d3js.org/d3.v3.min.js"></script>
    <!--script src="http://localhost:1025/static/js/d3.v3.min.js"></script-->
    <script>


    /* Given the raw bins (the distribution) and a point, estimate
    * the total population up to that point.
    */
    function sum(rawBins, value) {
    var total = 0;
    for (var i = 0; i < rawBins.length; i++) {
    var bin = rawBins[i];
    if ((bin.exact && bin.mean <= value) || value >= bin.next) {
    total += bin.pop;
    } else if (bin.prev < value && bin.mean >= value) {
    var range = (bin.mean - value) / (bin.mean - bin.prev);
    // Optionally, square the range for trapezoidal interpolation
    total += (1 - range) * bin.pop / 2;
    } else if (bin.mean < value && bin.next >= value) {
    var range = (value - bin.mean) / (bin.next - bin.mean);
    // Optionally, square the range for trapezoidal interpolation
    total += bin.pop / 2 + range * bin.pop / 2;
    }
    }
    return total;
    }

    /* Loads the distribution into a convenient format. */
    function loadRawBins(data) {
    var bins = [];
    if ("bins" in data.summary) {
    var sbins = data.summary.bins;
    minimum = data.summary.minimum;
    maximum = data.summary.maximum;
    for (var i = 0; i < sbins.length; i++) {
    var bin = {mean: sbins[i][0], pop: sbins[i][1]};
    if (bin.pop == 1 || bin.mean == minimum || bin.mean == maximum) {
    bin.exact = true;
    } else {
    bin.exact = false;
    if (i > 0) {
    bin.prev = sbins[i-1][0];
    } else {
    bin.prev = minimum;
    }
    if (i < sbins.length - 1) {
    bin.next = sbins[i+1][0];
    } else {
    bin.next = maximum;
    }
    }
    bins[i] = bin;
    }
    } else {
    sbins = data.summary.counts;
    for (var i = 0; i < sbins.length; i++) {
    var bin = {mean: sbins[i][0], pop: sbins[i][1], exact: true};
    bins[i] = bin;
    }
    }
    return bins;
    }

    /* Build equidistant bins for visualization given the
    * raw bins and a range.
    */
    function buildViz(rawBins, range, vizBinCount) {
    var vizBins = [];
    var maxPop = 0;
    var inc = (range[1] - range[0]) / vizBinCount;

    // Using a slight shift to act like a closed interval
    var shift = 0.0000001;
    var prev = range[0] - shift;
    var prevPop = sum(rawBins, prev);

    for (var i = 0; i < vizBinCount; i++) {
    var next = prev + inc;
    var nextPop = sum(rawBins, next);
    var pop = Math.round(nextPop - prevPop);
    maxPop = Math.max(maxPop, pop);
    vizBins[i] = {range: [prev + shift, next], pop: pop};
    prev = next;
    prevPop = nextPop;
    }
    return {bins: vizBins, maxPop: maxPop, range: range};
    }

    function capitalize(term) {
    return term.charAt(0).toUpperCase() + term.slice(1);
    }

    function trimRange(fullRange, rawBins, buffer, segments) {
    var total = 0;
    for (var i = 0; i < rawBins.length; i++) {
    total += rawBins[i].pop;
    }

    var interval = (fullRange[1] - fullRange[0]) / segments;

    var start = fullRange[0];
    var startThreshold = total * buffer;
    for (var i = 0; i < segments; i++) {
    if (sum(rawBins, start) > startThreshold) break;
    start += interval;
    }

    var end = fullRange[1];
    var endThreshold = total - (total * buffer);
    for (var i = 0; i < segments; i++) {
    if (sum(rawBins, end) < endThreshold) break;
    end -= interval;
    }

    return [start - interval, end + interval];
    }

    var margin = {top: 30, right: 40, bottom: 30, left: 40},
    width = 960 - margin.left - margin.right,
    height = 500 - margin.top - margin.bottom;

    var svg = d3.select("body").append("svg")
    .attr("width", width + margin.left + margin.right)
    .attr("height", height + margin.top + margin.bottom)
    .append("g")
    .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

    d3.json('diabetes-000004.json'/*"plasma.json"*/, function(error, data) {

    data = data.model.fields['000004'];
    var rawBins = loadRawBins(data);
    var x, y;
    var xAxis, yAxis;
    var xAxisG, yAxisG;
    var vizBinCount = 32;

    var buffer = (data.summary.maximum - data.summary.minimum) * 0.01;
    var initRange = [data.summary.minimum - buffer, data.summary.maximum + buffer];

    // Remove to default to the entire range rather than trimming 0.75% from the edges
    initRange = trimRange(initRange, rawBins, 0.0075, 128);

    function init() {
    var viz = buildViz(rawBins, initRange, vizBinCount);

    x = d3.scale.linear()
    .domain(viz.range)
    .range([0, width], .1);

    y = d3.scale.linear()
    .domain([0, viz.maxPop])
    .range([height, 0]);
    svg.selectAll(".bar")
    .data(viz.bins)
    .enter().append("rect")
    .attr("class", "bar")
    .attr("x", function(d) { return x(d.range[0]) + 1; })
    .attr("width", function(d) { return x(d.range[1]) - x(d.range[0]) - 2; })
    .attr("y", function(d) { return y(d.pop);})
    .attr("height", function(d) { return height - y(d.pop); });

    xAxis = d3.svg.axis()
    .ticks(Math.round(width / 60))
    .scale(x)
    .orient("bottom");

    yAxis = d3.svg.axis()
    .ticks(Math.round(height / 30))
    .scale(y)
    .orient("left");

    xAxisG = svg.append("g")
    .attr("class", "x axis")
    .attr("transform", "translate(0," + height + ")")
    .call(xAxis);

    xAxisG.append("text")
    .attr("class", "fname")
    .attr("y", -16)
    .attr("x", width)
    .attr("dy", ".71em")
    .style("text-anchor", "end")
    .text(capitalize(data.name));

    yAxisG = svg.append("g")
    .attr("class", "y axis")
    .call(yAxis);

    yAxisG.append("text")
    .attr("class", "fname")
    .attr("transform", "rotate(-90)")
    .attr("y", 6)
    .attr("dy", ".71em")
    .style("text-anchor", "end")
    .text("Population");
    }

    init();

    function update(rawBins, range, vizBinCount) {
    var animationDuration = 700;

    var viz = buildViz(rawBins, range, vizBinCount);
    x = d3.scale.linear()
    .domain(viz.range)
    .range([0, width], .1);

    y = d3.scale.linear()
    .domain([0, viz.maxPop])
    .range([height, 0], .1);

    svg.selectAll(".bar")
    .data(viz.bins)
    .transition().duration(animationDuration)
    .attr("y", function(d) { return y(d.pop);})
    .attr("height", function(d) { return height - y(d.pop); });

    xAxis = d3.svg.axis()
    .ticks(Math.round(width / 60))
    .scale(x)
    .orient("bottom");

    yAxis = d3.svg.axis()
    .ticks(Math.round(height / 30))
    .scale(y)
    .orient("left");

    xAxisG.transition().duration(animationDuration).call(xAxis);
    yAxisG.transition().duration(animationDuration).call(yAxis);
    }

    var brushX = d3.scale.identity().domain([0, width]);

    var brushExtent;
    var brush = d3.svg.brush()
    .x(brushX)
    .on("brush", brushed)
    .on("brushend", brushended);

    var gBrush = svg.append("g")
    .attr("class", "brush")
    .call(brush)
    .call(brush.event);

    gBrush.selectAll("rect")
    .attr("height", height);

    function brushed() {
    brushExtent = brush.extent();
    }

    function brushended() {
    if (!d3.event.sourceEvent) return; // only transition after input

    var start = x.invert(brushExtent[0]);
    var end = x.invert(brushExtent[1]);

    d3.select(this).call(brush.extent([[0], [0]]));
    if (start == end) {
    update(rawBins, initRange, vizBinCount);
    } else {
    update(rawBins, [start, end], vizBinCount);
    }
    }

    });

    </script>