Last active
January 4, 2016 03:58
-
-
Save jesslilly/8564976 to your computer and use it in GitHub Desktop.
Function to group an array of x,y data. Useful for charting. Requires underscore.js.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @method | |
* @public | |
* @description Take an array of objects and convert to an array of pairs whose | |
* xCol are grouped and yCol values are aggregated somehow. If | |
* grouping by day or month, dates must be in ZULU format strings! | |
* Original implementation returned an object with keys = xCol and | |
* values = yCol. It worked great but js maps(objects) cannot be | |
* sorted! | |
* @param {array} | |
* constInput - array of x/y values. Can include extra fields. | |
* @param {string} | |
* xCol - name of x attribute to group by. | |
* @param {string} | |
* yCol - name of y attribute to aggregate by. | |
* @param {string} | |
* groupFunction - day, month, x. Date options "fill in" missing | |
* values. | |
* @param {string} | |
* aggFunction - max, sum, avg | |
* @return {object} output - array of x/y values. | |
* @throws {Error} - | |
* when groupFunction is not passed in. Because this returns an | |
* object with the keys being the group, the function MUST do | |
* grouping otherwise there would be duplicate keys in the object | |
* which is not possible. | |
*/ | |
function groupBy(constInput, xCol, yCol, groupFunction, aggFunction) { | |
// -------------------- | |
// variable declaration | |
// -------------------- | |
var output = []; | |
// helper map. Use to do the grouping and reference group to array index. | |
var hMap = {}; | |
// Just an obj to iterate over. | |
var obj; | |
var sample = []; | |
var input = []; | |
var agg; | |
var ds; // date string | |
var minInGroup = null; | |
var maxInGroup = null; | |
// -------------------- | |
// parameter validation | |
// -------------------- | |
if (!groupFunction || !groupFunction.match(/^x$|^day$|^month$/)) { | |
throw "The groupFunction [" + groupFunction + "] is invalid!"; | |
} | |
if (!groupFunction || !aggFunction.match(/^min$|^max$|^sum$|^avg$|^count$/)) { | |
throw "The aggFunction [" + aggFunction + "] is invalid!"; | |
} | |
if (constInput.length <= 0) { | |
return []; | |
} | |
var stoi = function(input) { | |
return (isNaN(input)) ? input : parseFloat(input, 10); | |
} | |
// ----------------------- | |
// variable initialization | |
// ----------------------- | |
var start = new Date(); | |
// ----------------------------------------------------- | |
// Copy the input to a new array so we can mess with it. | |
// ----------------------------------------------------- | |
input = _.map(constInput, function(row) { | |
// The new object only needs the 2 columns in question, not all of | |
// them. | |
obj = {}; | |
// If we are grouping by day or month, we can now reduce date precision | |
// and make the aggregation easier later. | |
if (groupFunction === "day") { | |
obj[xCol] = row[xCol].substring(0, 10);// + "T00:00:00.000Z"; | |
} else if (groupFunction === "month") { | |
obj[xCol] = row[xCol].substring(0, 7) + "-01";// T00:00:00.000Z"; | |
} else { | |
obj[xCol] = row[xCol]; | |
} | |
obj[yCol] = stoi(row[yCol]); | |
return obj; | |
}); | |
// ----------------------------------------------------- | |
// Rip nasty null data out of the input. | |
// ----------------------------------------------------- | |
input = _.filter(input, function(row) { | |
// false, null, 0, "", undefined and NaN | |
var x = row[xCol]; | |
var y = row[yCol]; | |
var good = (x !== null && x !== undefined) && (y !== null && y !== undefined && !isNaN(y)); | |
return good; | |
}); | |
// --------------------------------------------- | |
// Create the output object already pre-grouped. | |
// --------------------------------------------- | |
if (groupFunction === "x") { | |
_.each(input, function(row, idx) { | |
hMap[row[xCol]] = null; | |
}); | |
_.each(hMap, function(value, key) { | |
obj = {}; | |
obj[xCol] = key; | |
obj[yCol] = 0; | |
// Get the output ready with this x value. | |
output.push(obj); | |
hMap[key] = output.length - 1; | |
}); | |
} | |
if (groupFunction.match(/^day$|^month$/)) { | |
// normalize input such that all dates exist prior to grouping. | |
// First build an input structure that has all the dates we need. | |
// Also "pre-group" the dates. | |
// Dates are really special; Allow any sort for x value grouping, but | |
// dates | |
// really need to be sorted chonologically or it really doesn't look | |
// right. | |
input = _.sortBy(input, xCol); | |
// If input data is already sorted, we can get min and max easily. | |
var minInGroup = input[0][xCol]; | |
var maxInGroup = input[input.length - 1][xCol]; | |
console.log("minInGroup " + minInGroup); | |
console.log("maxInGroup " + maxInGroup); | |
if (groupFunction === "day") { | |
minInGroup = minInGroup.substring(0, 10);// + "T00:00:00.000Z"; | |
maxInGroup = maxInGroup.substring(0, 10);// + "T00:00:00.000Z"; | |
} | |
if (groupFunction === "month") { | |
minInGroup = minInGroup.substring(0, 7) + "-01";// T00:00:00.000Z"; | |
maxInGroup = maxInGroup.substring(0, 7) + "-01";// T00:00:00.000Z"; | |
} | |
// Simply fill in all dates in the range. | |
// Also reduce precision for day and month. | |
var date = new Date(minInGroup); | |
var maxDate = new Date(maxInGroup); | |
var iter = 0; // Break infinite loops. | |
while (date <= maxDate && iter++ < 10000000) { | |
obj = {}; | |
obj[xCol] = date.toISOString().substring(0, 10);// + | |
// "T00:00:00.000Z";; | |
obj[yCol] = 0; | |
// Get the output ready with this date. | |
output.push(obj); | |
// Get the hMap ready so we can quickly map the input into the | |
// output index. | |
hMap[obj[xCol]] = output.length - 1; | |
// Increment date. | |
// Awesome. Bad date increment code removed thanks to: | |
// http://stackoverflow.com/a/9989458/1804678 | |
if (groupFunction === "day") { | |
date.setUTCDate(date.getUTCDate() + 1); | |
} | |
if (groupFunction === "month") { | |
date.setUTCMonth(date.getUTCMonth() + 1); | |
} | |
} | |
} | |
console.log("hMap " + JSON.stringify(hMap).substring(0, 400) + "..."); | |
// ----------------------------- | |
// perform group and aggregation | |
// ----------------------------- | |
var max = function(row, idx) { | |
var groupIdx = hMap[row[xCol]]; | |
if (row[yCol] > output[groupIdx][yCol]) { | |
output[groupIdx][yCol] = row[yCol]; | |
} | |
}; | |
var min = function(row, idx) { | |
var groupIdx = hMap[row[xCol]]; | |
if (row[yCol] < output[groupIdx][yCol]) { | |
output[groupIdx][yCol] = row[yCol]; | |
} | |
}; | |
var sum = function(row, idx) { | |
// Performance testing shows no difference between this and an if/else. | |
var groupIdx = hMap[row[xCol]]; | |
if (groupIdx === undefined) { | |
console.log("hMap " + JSON.stringify(hMap).substring(0, 400) + "..."); | |
console.log("row[xCol] " + row[xCol]); | |
} | |
output[groupIdx][yCol] += row[yCol]; | |
}; | |
if (aggFunction === "avg") { | |
sample = new Array(output.length - 1); | |
for ( var idx = 0; idx < output.length; ++idx) { | |
sample[idx] = 0; | |
} | |
} | |
var avg = function(row, idx) { | |
var groupIdx = hMap[row[xCol]]; | |
output[groupIdx][yCol] += row[yCol]; | |
sample[groupIdx] += 1; | |
}; | |
var count = function(row, idx) { | |
var groupIdx = hMap[row[xCol]]; | |
output[groupIdx][yCol] += 1; | |
}; | |
// Use eval to assign the passed in function name to the function, agg. | |
eval("agg = " + aggFunction); | |
_.each(input, agg); | |
// ---------------------------- | |
// avg requires post-processing | |
// ---------------------------- | |
if (aggFunction === "avg") { | |
_.each(output, function(row, idx) { | |
row[yCol] = (sample[idx] === 0) ? 0 : row[yCol] / sample[idx]; | |
}); | |
} | |
var end = new Date(); | |
console.info("groupBy processed rows " + input.length + " rows in " + (end - start) + "ms."); | |
return output; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
describe('groupBy', function() { | |
beforeEach(module('myApp.controllers')); | |
var input = [ { | |
name : "Jess", | |
date : "2013-12-31T22:22:22.222Z", | |
age : 1 | |
}, { | |
name : "Jess", | |
date : "2013-12-31T11:11:11.111Z", | |
age : .5 | |
}, { | |
name : "Amit", | |
date : "2014-01-03T04:04:04.040Z", | |
age : 2 | |
} ]; | |
for ( var idx = 0; idx <= 100000; idx++) { | |
input.push({ | |
name : "Cher", | |
date : "2014-01-02T00:00:00.000Z", | |
age : idx | |
}); | |
} | |
var monthInput = _.map(input, _.clone); | |
monthInput[2].date = "2014-03-01T00:00:00.000Z"; | |
it('should throw an exception when groupFunction is bad.', function() { | |
expect(function() { | |
groupBy(input, "name", "age", "none", "max"); | |
}).toThrow(); | |
}); | |
it('should throw an exception when aggFunction is bad.', function() { | |
expect(function() { | |
groupBy(input, "name", "age", "x", "bad"); | |
}).toThrow(); | |
}); | |
it('should group by name value, showing max.', function() { | |
expect(groupBy(input, "name", "age", "x", "max")).toEqual([ { | |
name : "Jess", | |
age : 1 | |
}, { | |
name : "Amit", | |
age : 2 | |
}, { | |
name : "Cher", | |
age : 100000 | |
} ]); | |
}); | |
it('should group by name value, showing sum.', function() { | |
expect(groupBy(input, "name", "age", "x", "sum")).toEqual([ { | |
name : "Jess", | |
age : 1.5 | |
}, { | |
name : "Amit", | |
age : 2 | |
}, { | |
name : "Cher", | |
age : 5000050000 | |
} ]); | |
}); | |
it('should group by date/day value, showing sum, filling in days.', function() { | |
expect(groupBy(input, "date", "age", "day", "sum")).toEqual([ { | |
date : "2013-12-31", | |
age : 1.5 | |
}, { | |
date : "2014-01-01", | |
age : 0 | |
}, { | |
date : "2014-01-02", | |
age : 5000050000 | |
}, { | |
date : "2014-01-03", | |
age : 2 | |
} ]); | |
}); | |
it('should group by date/month value, showing avg, filling in months.', function() { | |
expect(groupBy(monthInput, "date", "age", "month", "avg")).toEqual([ { | |
date : "2013-12-01", | |
age : .75 | |
}, { | |
date : "2014-01-01", | |
age : 50000 | |
}, { | |
date : "2014-02-01", | |
age : 0 | |
}, { | |
date : "2014-03-01", | |
age : 2 | |
} ]); | |
}); | |
it('should handle blank, null and numbers as strings in input data.', function() { | |
expect(groupBy([ { | |
user : "jess", | |
age : null | |
}, { | |
user : null, | |
age : null | |
}, { | |
user : "jess", | |
age : "2222" | |
}, { | |
user : "jess", | |
age : undefined | |
}, { | |
user : "jess", | |
age : 0 | |
}, { | |
user : "jess", | |
age : '' | |
}, { | |
user : "jess", | |
age : "999" | |
}, { | |
user : "jess", | |
age : "99.99" | |
}, { | |
user : "jess", | |
age : "1" | |
}, { | |
user : null, | |
age : NaN | |
}, { | |
user : "jess", | |
age : ' ' | |
}, { | |
user : "amit", | |
age : '2222' | |
}, { | |
user : "amit", | |
age : "1" | |
}, { | |
user : null, | |
age : null | |
}, { | |
user : "amit", | |
age : '90' | |
} ], "user", "age", "x", "max")).toEqual([ { | |
user : "jess", | |
age : 2222 | |
}, { | |
user : "amit", | |
age : 2222 | |
} ]); | |
}); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment