Last active
March 16, 2022 10:04
-
-
Save ducaale/3dfb476630bea8c5cb155c168495a4da to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Given a list of sets [set1, set2, set3, ... setn], and a window with start and end such that (end - start) is constant. | |
// What is the fastest way to get all possible aggregates of the sets by moving start and end by 1 each time? | |
// | |
// example: | |
// const data = [set1, set2, set3, set4, set5, set6, set7, set8, set9]; | |
// const windowSize = 4 | |
// const [s, e] = [0, windowSize] | |
// | |
// in this case, our result should be: | |
// const result = [ | |
// set1 + set2 + set3 + set4, | |
// set2 + set3 + set4 + set5, | |
// set3 + set4 + set5 + set6, | |
// set4 + set5 + set6 + set7, | |
// set5 + set6 + set7 + set8, | |
// set6 + set7 + set8 + set9, | |
// ]; | |
// | |
// Note that addition and subtraction in this context means Set.union and Set.difference respectively. | |
// one way to optimize this is by subtracting set[s-1] and adding set[e] each time. For example, if we had | |
// (set1 + set2 + set3 + set4) in the first entry, our second entry could be equal to | |
// (set1 + set2 + set3 + set4) - set1 + set5. | |
// Unfortunately, I wasn't able to achieve this as it's hard to only subtract elements that are unique to set1 | |
// | |
// Another way to approach this is by partitioning our data into 2 parts and doing an scan operation on them (reduce that | |
// leaves intermediate results) on one part and doing scanBack on the other. | |
// | |
// part1 = [set1+set2+set3+set4, set1+set2+set3, set1+set2, set1] | |
// part2 = [set5, set5+set6, set5+set6+set7, set5+set6+set7+set8] | |
// | |
// at this point, we just need to add up the last item from part1 to the first item from part2 and so on... | |
// | |
// result = [(part1[0] + part2[3]), (part1[1] + part2[2]), (part1[2] + part2[1]), (part1[3] + part2[0])] | |
// | |
// as a result, we've reduced our union operation calls from 18 to 10! | |
const union = (setA, setB) => { | |
let _union = new Set(setA) | |
for (let elem of setB) { _union.add(elem) } | |
return _union | |
} | |
const last = arr => arr[arr.length-1] | |
const scan = (arr, fn) => | |
arr.reduce( | |
(accum, current) => | |
accum.length === 0 ? [current] : [...accum, fn(current, last(accum))], | |
[] | |
) | |
const scanBack = (arr, fn) => scan([...arr].reverse(), fn).reverse() | |
const partition = arr => [ | |
arr.slice(0, Math.floor(arr.length/2)), | |
arr.slice(Math.floor(arr.length/2)) | |
] | |
// naive approach | |
const getRolling1 = (data, windowSize, duration) => { | |
const rolling = [] | |
for (let i = (data.length - duration)+1; i <= data.length; i++) { | |
rolling.push( | |
data.slice(i-windowSize, i).reduce(union) | |
) | |
} | |
return rolling.slice(-duration) | |
} | |
// slightly less naive approach | |
const getRolling2 = (data, windowSize, duration) => { | |
let rolling = [] | |
for (let i = data.length; rolling.length < data.length - duration; i = i-(windowSize+1)) { | |
const rollingTemp = [] | |
const [b1, b2] = partition(data.slice(i-(windowSize*2), i)) | |
const temp1 = scanBack(b1, union) | |
const temp2 = scan(b2, union) | |
rollingTemp.push(temp1[0]) | |
for (let i = 1; i < temp1.length; i++) { | |
rollingTemp.push(union(temp1[i], temp2[i-1])) | |
} | |
rollingTemp.push(last(temp2)) | |
rolling = [...rollingTemp, ...rolling] | |
} | |
return rolling.slice(-duration) | |
} | |
const randInt = (min, max) => Math.floor(Math.random() * (max - min) + min); | |
const range = n => [...Array(n).keys()]; | |
const buckets = range(60).map(() => { | |
const bucket = new Set() | |
for (const i in range(10)) { bucket.add(randInt(0, 1000)) } | |
return bucket | |
}) | |
console.log(getRolling1(buckets, 7, 30).map(r => r.size)) | |
console.log(getRolling2(buckets, 7, 30).map(r => r.size)) | |
// getRolling1(buckets, 7, 30) // 180 union operations | |
// getRolling1(buckets, 30, 30) // 870 union operations | |
// getRolling2(buckets, 7, 30) // 72 union operations | |
// getRolling2(buckets, 30, 30) // 87 union operations |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment