Created
November 9, 2019 00:26
-
-
Save saptarshiguha/5eebd2943c6ff0b25ef3eeb5c83773c0 to your computer and use it in GitHub Desktop.
c.sql
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
REATE OR REPLACE FUNCTION analysis.sg_histogram_aggregate(r ARRAY<STRUCT<v STRING>>) RETURNS Array<STRUCT<k STRING, v INT64>> | |
LANGUAGE js AS \"\"\" | |
// called as sg_histogram_aggregate(ARRAY_AGG(Struct( JSON_EXTRACT(A HIstogram,'$.values') as v))) | |
var arrayLength = r.length; | |
if( arrayLength == 0) { | |
return( [] ); | |
} | |
var d = 0; | |
var accum = { }; | |
for (var i = 0; i < arrayLength; i++) { | |
var o = r[i].v | |
if(o === null) { continue; } | |
o = JSON.parse(o); | |
for (var key in o) { | |
if( accum[ key ] === undefined){ | |
accum[ key ] = 0; | |
} | |
accum[ key] = accum[ key ] + parseInt(o[key],10) ; | |
} | |
} | |
if( Object.keys(accum).length == 0){ | |
return([]); | |
}else{ | |
var ot = []; | |
for( var key in accum){ | |
ot.push({'k': key, 'v': accum[key]}) | |
} | |
return(ot); | |
} | |
\"\"\"; | |
CREATE OR REPLACE FUNCTION analysis.sg_histogram_center(V Array<STRUCT<k STRING, v INT64>>,uselog BOOL) AS ( | |
-- computers the 'center' of a histogram or the geometric mean if uselog is true | |
-- expects as input the output of analysis.sg_histogram_aggregate | |
( | |
with a as ( | |
select | |
SUM(obj.v) as n, | |
case when uselog=false then sum(obj.v*CAST(obj.k as INT64)) | |
else sum(obj.v*log(1+CAST(obj.k as INT64)) ) end as s | |
from | |
unnest(V) as obj | |
) | |
select | |
case when n=0 then STRUCT(0 as m, 0 as n) | |
when uselog = false then STRUCT(exp(s/n)-1 as m, n as n) | |
else STRUCT(s/n as center, n as nobs) end | |
from a | |
) | |
); | |
CREATE OR REPLACE FUNCTION analysis.sg_prob_LE_c(V ARRAY<Struct<k STRING,v INT64>>,c FLOAT64,inv BOOL) AS ( | |
-- Prob(X<=c), if inv == True then 1-Prob(X<=c) == > Prob(X > c) | |
-- expects as input the output of analysis.sg_histogram_aggregate | |
( | |
with a as ( | |
select | |
sum(x.v) as denominator, | |
sum(case when cast(x.k as INT64) <= c then x.v else 0 end) as numerator | |
from unnest(V) as x | |
) | |
select | |
case when inv = TRUE then 1-numerator/denominator else numerator/denominator end | |
from a | |
) | |
); | |
CREATE OR REPLACE FUNCTION analysis.sg_prob_IN_c(V ARRAY<Struct<k STRING,v INT64>>,c ARRAY<INT64>) AS ( | |
-- Prob(X in {c}) | |
-- expects as input the output of analysis.sg_histogram_aggregate | |
( | |
with a as ( | |
select | |
sum(x.v) as denominator, | |
sum(case when cast(x.k as INT64) IN UNNEST(c) then x.v else 0 end) as numerator | |
from unnest(V) as x | |
) | |
select numerator/denominator | |
from a | |
) | |
); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment