-
-
Save vunb/51b76988aa554adf6283ecf7c60aeaa3 to your computer and use it in GitHub Desktop.
Cloudflare Workers / Segment Smart Proxy — serve data collection assets and endpoints from your own domain
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Steps to use: | |
* 1. Create CF Worker, copy and paste this in | |
* 2. (Optional) Update configuration defaults | |
* - If you want to manage in code, do so below under "Static Configuration" | |
* - If you want dynamic custom config: Create CFW KV namespace, link them, and add reference below | |
* | |
* - You can overwrite default path prefix for loading analytics.js (<yourdomain>/ajs) | |
* (corresponding KV entry: `script_path_prefix`) | |
* - You can overwrite default path prefix for handling first-party data collection (<yourdomain>/data) | |
* (corresponding KV entry: `collection_api_path_prefix`) | |
* - You can overwrite default cookie name for the edge-side anonymous ID | |
* (corresponding KV entry: `cookie_name`) | |
* - You can overwrite default integration list path prefix (/int-list) | |
* (corresponding KV entry: `integration_list_path_prefix`) | |
* - You can overwrite the default refresh trigger if you want to more regularly update the anonymousId | |
* (corresponding KV entry: `refresh_threshold`) | |
* - You can set a path for echoing the session ID | |
* (corresponding KV entry: `default_write_key`) | |
* - You can set a default write key if you just want to use one globally and want to omit it from your site code | |
* (corresponding KV entry: `write_key`) | |
* - You can set an error collection endpoint if you have a logging service that accepts webhooks | |
* (corresponding KV entry: `write_key`) | |
* | |
* 3. (If needed) If you use it for Consent Management, update any conditional destination loading logic to pull the integration list from your host + integration list path prefix | |
* eg. If using Segment Consent Manager or https://gist.github.com/sperand-io/4725e248a35d5005d68d810d8a8f7b29 | |
* ...instead of fetch(`https://cdn.segment.com/v1/projects/${writeKey}/integrations`) | |
* ...replace with fetch(`${location.origin}/ilist/${writeKey}`) or fetch(`${location.origin}/ilist/}`) | |
* 3. (REQUIRED) Deploy and configure the worker to serve for your desired domain/subdomain and at your desired path | |
* 4. (REQUIRED) Update your segment snippet to load from your host + script path prefix | |
* (eg find n.src="https://cdn.segment.com/analytics.js/v1/"+t+"/analytics.min.js" in snippet and ...) | |
* (replace with n.src=`${location.origin}/ajs` if you have a default write key set) | |
* (or with n.src=`${location.origin}/ajs/${t}` if not) | |
*/ | |
let KV_NAMESPACE | |
// START STATIC CONFIGURATION | |
const STATIC_CONFIG = { | |
COOKIE_NAME: '__anonymous_session_id', | |
SCRIPT_PATH_PREFIX: 'ajs', | |
COLLECTION_API_PATH_PREFIX: 'data', | |
INTEGRATION_LIST_PATH_PREFIX: 'ilist', | |
ANONYMOUS_ID_ECHO_PATH: '', | |
REFRESH_THRESHOLD: 45, | |
DEFAULT_WRITE_KEY: '3K4xZlUgQFAa3MRdnRRKvbvDEukDCWeu', | |
ERROR_ENDPOINT: 'https://enj0zt42hq1y.x.pipedream.net' | |
} | |
// END STATIC CONFIGUATION. Editing below this line is discouraged. | |
/** | |
* Attach top-level responder. | |
*/ | |
addEventListener('fetch', event => { | |
event.respondWith(handleErr(event)) | |
}) | |
/** | |
* Top level event handler. | |
* | |
* Wraps our request handler in an error handler, | |
* optionally forward errors to a logging service. | |
* | |
* @param {Event} event | |
*/ | |
async function handleErr(event) { | |
try { | |
const res = await handleEvent(event) | |
return res | |
} catch (err) { | |
let endpoint = KV_NAMESPACE && (await KV_NAMESPACE.get('error_endpoint')) | |
if (!endpoint) endpoint = STATIC_CONFIG['ERROR_ENDPOINT'] | |
if (endpoint) event.waitUntil(log(endpoint, err, event.request)) | |
return new Response(err.message || 'An error occurred!', { | |
status: err.statusCode || 500 | |
}) | |
} | |
} | |
/** | |
* Respond to the request | |
* | |
* Provides special handling for Segment requests against the configured || default paths. | |
* | |
* @param {Event} event | |
*/ | |
async function handleEvent(event) { | |
const config = KV_NAMESPACE ? await hydrateConfig(KV_NAMESPACE) : STATIC_CONFIG | |
const { | |
COOKIE_NAME, | |
SCRIPT_PATH_PREFIX, | |
COLLECTION_API_PATH_PREFIX, | |
INTEGRATION_LIST_PATH_PREFIX, | |
ANONYMOUS_ID_ECHO_PATH | |
} = config | |
const cache = caches.default | |
const { request } = event | |
const url = new URL(request.url) | |
// extract cookie information | |
const cookieData = getCookieData(request, COOKIE_NAME) | |
// serve analytics.js | |
if (startsWith(url, SCRIPT_PATH_PREFIX)) | |
return await handleScript(event, cache, cookieData, config) | |
// serve first party data collection pings | |
if (startsWith(url, COLLECTION_API_PATH_PREFIX)) | |
return await handleDataCollection(request, cookieData, config) | |
// serve first party data collection pings | |
if (startsWith(url, INTEGRATION_LIST_PATH_PREFIX)) | |
return await handleIntegrationListing(request, config) | |
// serve anonymousId echo | |
if (ANONYMOUS_ID_ECHO_PATH && startsWith(url, ANONYMOUS_ID_ECHO_PATH)) | |
return await handleEcho(event, cookieData) | |
// passthrough everything else | |
return await fetch(request) | |
} | |
/** | |
* Serve analytics.js | |
* | |
* Serves a modified analytics.js for (default || passed) writeKey at (default || configured) (path || path prefix) | |
* Mods: | |
* If writeKey is omitted, get the default script | |
* Updates data collection api host in the script itself | |
* If needed, sets an HTTPOnly anonymous session cookie (and corresponding set-at cookie) | |
* | |
* @param {Event} event | |
* @param {Cache} cache | |
* @param {Object} cookieData | |
* @param {String} cookieData.anonymousId | |
* @param {Date} cookieData.expires | |
* @param {Object} config | |
*/ | |
async function handleScript( | |
event, | |
cache, | |
{ anonymousId, expires }, | |
{ | |
SCRIPT_PATH_PREFIX, | |
DEFAULT_WRITE_KEY, | |
COLLECTION_API_PATH_PREFIX, | |
COOKIE_NAME, | |
REFRESH_THRESHOLD | |
} | |
) { | |
const { request } = event | |
const { pathname, hostname } = new URL(request.url) | |
let [_, writeKey] = pathname.split(`/${SCRIPT_PATH_PREFIX}/`) | |
if (!writeKey) writeKey = DEFAULT_WRITE_KEY | |
let response | |
const cached = await cache.match(request) | |
if (cached) { | |
response = cached | |
} else { | |
const endpoint = `https://cdn.segment.com/analytics.js/v1/${writeKey}/analytics.min.js` | |
const originalResponse = await fetch(new Request(endpoint, request)) | |
const newResponse = originalResponse.clone() | |
const analyticsjs = await originalResponse.text() | |
const modifiedAnalyticsjs = analyticsjs.replace( | |
/\api\.segment\.io\/v1/g, | |
`${hostname}/${COLLECTION_API_PATH_PREFIX}` | |
) | |
response = new Response(modifiedAnalyticsjs, newResponse) | |
event.waitUntil(cache.put(request, response.clone())) | |
} | |
if (!anonymousId || expiresSoon(expires, REFRESH_THRESHOLD)) { | |
const oneYearFromNow = new Date() | |
oneYearFromNow.setFullYear(oneYearFromNow.getFullYear() + 1) | |
response.headers.append( | |
'Set-Cookie', | |
createCookie(COOKIE_NAME, uuid(), oneYearFromNow) | |
) | |
response.headers.append( | |
'Set-Cookie', | |
createCookie(`${COOKIE_NAME}_set`, oneYearFromNow.toUTCString(), oneYearFromNow) | |
) | |
} | |
return response | |
} | |
/** | |
* Serve first party data collection API | |
* | |
* Serves a handler to modify and forward events to Segment at the default || configured path prefix | |
* Mods: | |
* If present in the request cookie, overwrites anonymousId with edge-side cookie value | |
* | |
* @param {Request} request | |
* @param {Object} cookieData | |
* @param {String} cookieData.anonymousId | |
* @param {Object} config | |
*/ | |
async function handleDataCollection( | |
request, | |
{ anonymousId }, | |
{ COLLECTION_API_PATH_PREFIX } | |
) { | |
const originalRequest = request.clone() | |
const body = JSON.stringify({ | |
...(await request.json()), | |
...(anonymousId ? { anonymousId } : {}) | |
}) | |
const { pathname, hostname } = new URL(request.url) | |
const correctPath = pathname.replace(COLLECTION_API_PATH_PREFIX, 'v1') | |
const newRequest = new Request( | |
`https://api.segment.io${correctPath}`, | |
new Request(originalRequest, { body }) | |
) | |
newRequest.headers.append('origin', `https://${hostname}`) | |
return await fetch(newRequest) | |
} | |
/** | |
* Serve first party integration list API | |
* | |
* Serves a handler to passthrough list requests for default || passed writeKey at the default || configured path prefix | |
* | |
* @param {Request} request | |
* @param {Object} config | |
*/ | |
async function handleIntegrationListing( | |
request, | |
{ INTEGRATION_LIST_PATH_PREFIX, DEFAULT_WRITE_KEY } | |
) { | |
const { pathname } = new URL(request.url) | |
let [_, writeKey] = pathname.split(`/${INTEGRATION_LIST_PATH_PREFIX}/`) | |
if (!writeKey) writeKey = DEFAULT_WRITE_KEY | |
const endpoint = `https://cdn.segment.com/v1/projects/${writeKey}/integrations` | |
return await fetch(new Request(endpoint, new Request(request, { body }))) | |
} | |
/** | |
* Serve first party anonymousID echo API | |
* | |
* @param {Request} request | |
* @param {Object} config | |
*/ | |
async function handleEcho(request, { anonymousId }) { | |
if (anonymousId) { | |
return new Response(JSON.stringify({ anonymousId }), { | |
headers: new Headers({ | |
'Content-Type': 'application/json' | |
}) | |
}) | |
} | |
new Response('No AnonymousId', { status: 404 }) | |
} | |
/** | |
* HELPERS | |
*/ | |
/** | |
* Check if url path begins with a specified prefix | |
* @param {NAMESPACE} KV | |
*/ | |
async function hydrateConfig(KV) { | |
const keys = [ | |
'cookie_name', | |
'script_path_prefix', | |
'collection_api_path_prefix', | |
'integration_list_path_prefix', | |
'refresh_threshold', | |
'default_write_key' | |
] | |
return Promise.all( | |
keys.map(async k => { | |
return { [k.toUpperCase()]: (await KV.get(k)) || '' } | |
}) | |
).reduce((config, { key, storedKValue }) => { | |
if (storedKValue) { | |
config[key] = storedKValue | |
} | |
return config | |
}, STATIC_CONFIG) | |
} | |
/** | |
* Check if url path begins with a specified prefix | |
*/ | |
function startsWith(url, prefix) { | |
if (url.pathname.startsWith(`/${prefix}`)) return true | |
return false | |
} | |
/** | |
* Check if the anonymousId is due to be refreshed | |
* (ie. is our expiration closer than our threshold window allows?) | |
*/ | |
function expiresSoon(when, REFRESH_THRESHOLD) { | |
// eg. 45 days from now | |
const threshold = new Date() | |
threshold.setDate(threshold.getDate() + REFRESH_THRESHOLD) | |
// is expiration in less than eg. 45 days? | |
if (when < threshold) return true | |
else return false | |
} | |
/** | |
* Encode a cookie string suited for our use case | |
*/ | |
function createCookie(name, value, expires) { | |
return `${encodeURIComponent(name)}=${encodeURIComponent( | |
value | |
)}; Expires=${expires.toUTCString()}; SameSite=Strict; Secure; HttpOnly` | |
} | |
/** | |
* Generate a spec-compliant uuid-v4 | |
* adapted from: https://gist.github.com/bentranter/ed524091170137a72c1d54d641493c1f | |
*/ | |
function uuid() { | |
const bytes = crypto.getRandomValues(new Uint8Array(16)) | |
bytes[6] = (bytes[6] & 0x0f) | 0x40 | |
bytes[8] = (bytes[8] & 0xbf) | 0x80 | |
const chars = [...bytes].map(byte => byte.toString(16)) | |
const insertionPoints = [4, 6, 8, 10] | |
return chars.reduce((uuid, char, index) => { | |
if (insertionPoints.includes(index)) { | |
return (uuid += `-${char}`) | |
} else { | |
return (uuid += char) | |
} | |
}) | |
} | |
/** | |
* Grabs the anonymousId and expiration time from the cookies in the request header | |
* | |
* Adapted from: https://developers.cloudflare.com/workers/templates/pages/cookie_extract/ | |
* | |
* @param {Request} request incoming Request | |
* @param {string} name of the edge-side cookie | |
*/ | |
function getCookieData(request, name) { | |
let anonymousId = null | |
let expires = null | |
let cookieString = request.headers.get('Cookie') | |
if (cookieString) { | |
let cookies = cookieString.split(';') | |
cookies.forEach(cookie => { | |
let cookieName = cookie.split('=')[0].trim() | |
if (cookieName === name) { | |
anonymousId = cookie.split('=')[1] | |
} | |
if (cookieName === `${name}_set`) { | |
expires = new Date(decodeURIComponent(cookie.split('=')[1])) | |
} | |
}) | |
} | |
return { anonymousId, expires } | |
} | |
/** | |
* Ship the error with some helpful request context as JSON to the specified endpoint | |
* | |
* ADAPTED from https://github.com/bustle/cf-sentry/ | |
* | |
* @param {String} endpoint | |
* @param {Error} err the error | |
* @param {Request} request incoming Request | |
*/ | |
async function log(endpoint, err, request) { | |
const body = JSON.stringify(errToJson(err, request)) | |
const res = await fetch(endpoint, { | |
method: 'POST', | |
headers: { | |
'Content-Type': 'application/json' | |
}, | |
body | |
}) | |
if (res.status === 200) { | |
return | |
} | |
// We couldn't send to error endpoint, try to log the response at least | |
console.error({ httpStatus: res.status, ...(await res.json()) }) // eslint-disable-line no-console | |
} | |
/** | |
* Encode the parsed and formatted error as JSON | |
* | |
* ADAPTED from https://github.com/bustle/cf-sentry/ | |
* | |
* @param {Error} err the error | |
* @param {Request} request incoming Request | |
*/ | |
function errToJson(err, request) { | |
const errType = err.name || (err.contructor || {}).name | |
const frames = parse(err) | |
const extraKeys = Object.keys(err).filter( | |
key => !['name', 'message', 'stack'].includes(key) | |
) | |
return { | |
message: errType + ': ' + (err.message || '<no message>'), | |
exception: { | |
values: [ | |
{ | |
type: errType, | |
value: err.message, | |
stacktrace: frames.length ? { frames: frames.reverse() } : undefined | |
} | |
] | |
}, | |
extra: extraKeys.length | |
? { | |
[errType]: extraKeys.reduce((obj, key) => ({ ...obj, [key]: err[key] }), {}) | |
} | |
: undefined, | |
platform: 'worker', | |
timestamp: Date.now() / 1000, | |
request: | |
request && request.url | |
? { | |
method: request.method, | |
url: request.url, | |
query_string: request.query, | |
headers: request.headers, | |
data: request.body | |
} | |
: undefined | |
} | |
} | |
/** | |
* Parse errors. | |
* | |
* ADAPTED from https://github.com/bustle/cf-sentry/ | |
* | |
* @param {Error} err the error\ | |
*/ | |
function parse(err) { | |
return (err.stack || '') | |
.split('\n') | |
.slice(1) | |
.map(line => { | |
if (line.match(/^\s*[-]{4,}$/)) { | |
return { filename: line } | |
} | |
// From https://github.com/felixge/node-stack-trace/blob/1ec9ba43eece124526c273c917104b4226898932/lib/stack-trace.js#L42 | |
const lineMatch = line.match( | |
/at (?:(.+)\s+\()?(?:(.+?):(\d+)(?::(\d+))?|([^)]+))\)?/ | |
) | |
if (!lineMatch) { | |
return | |
} | |
return { | |
function: lineMatch[1] || undefined, | |
filename: lineMatch[2] || undefined, | |
lineno: +lineMatch[3] || undefined, | |
colno: +lineMatch[4] || undefined, | |
in_app: lineMatch[5] !== 'native' || undefined | |
} | |
}) | |
.filter(Boolean) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment