sibljon · April 29, 2026 23:13
diff --git a/example.php b/example.php
 <?php
 /**
 * Spruce attribution reporting for l.sprucehealth.com.
 *
 * Wired into the click-resolution flow so each click on a pre-blessed link
 * lands a row in the Spruce `attributions` Looker model.
 *
 * Spec: https://gist.github.com/sibljon/bfd93706bd215806458f89686527bb2a
 */

 const SPRUCE_ATTRIBUTION_ENDPOINT       = 'https://msg-api.sprucehealth.com/graphql';
 const SPRUCE_ATTRIBUTION_USER_AGENT     = 'l-sprucehealth-com/1.0';
 const SPRUCE_ATTRIBUTION_TIMEOUT_SECONDS = 5;

 const SPRUCE_ATTRIBUTION_QUERY = <<<'GQL'
 mutation AssociateAttribution($input: AssociateAttributionInput!) {
  associateAttribution(input: $input) {
    success
    errorCode
    errorMessage
  }
 }
 GQL;

 /**
 * POST associateAttribution to msg-api, then relay any Set-Cookie response
 * headers back to the user (so the backend's `did` cookie minting works).
 *
 * Failures are logged and swallowed. Never throws. Never lets an attribution
 * problem affect the user-facing redirect.
 *
 * Call this BEFORE you call `header('Location: ...')` if the user has no
 * `did` cookie inbound (so we can relay the freshly-minted Set-Cookie).
 * Call it AFTER `fastcgi_finish_request()` if the user already has `did`
 * (fire-and-forget, no Set-Cookie needed).
 *
 * @param string $requestUrl   Full URL the user hit, e.g. "https://l.sprucehealth.com/x123?utm_source=newsletter".
 * @param string $cookieHeader Raw inbound Cookie header ($_SERVER['HTTP_COOKIE'] ?? '').
 */
 function spruceReportAttribution(string $requestUrl, string $cookieHeader): void {
    try {
        $parsed = parse_url($requestUrl);
        if (!is_array($parsed) || empty($parsed['host'])) {
            error_log('spruceReportAttribution: unparseable requestUrl');
            return;
        }
        $hostname  = $parsed['host'];
        $pathname  = $parsed['path'] ?? '/';
        $scheme    = $parsed['scheme'] ?? 'https';
        $urlValue  = $scheme . '://' . $hostname . $pathname;
        $rawQuery  = $parsed['query'] ?? '';

        // Build values: synthetic `url` first, then one entry per inbound query
        // param. Split the raw query string ourselves rather than using
        // parse_str(), which collapses repeated keys (the spec requires us to
        // emit one entry per occurrence).
        $values = [['key' => 'url', 'value' => $urlValue]];
        if ($rawQuery !== '') {
            foreach (explode('&', $rawQuery) as $pair) {
                if ($pair === '') {
                    continue;
                }
                $eq = strpos($pair, '=');
                if ($eq === false) {
                    $values[] = ['key' => urldecode($pair), 'value' => ''];
                } else {
                    $values[] = [
                        'key'   => urldecode(substr($pair, 0, $eq)),
                        'value' => urldecode(substr($pair, $eq + 1)),
                    ];
                }
            }
        }

        // JSON_INVALID_UTF8_SUBSTITUTE replaces invalid UTF-8 byte sequences
        // (e.g. a query value of "%FF" decoded to raw byte 0xFF) with U+FFFD
        // rather than failing the whole encode and dropping the row.
        $body = json_encode([
            'operationName' => 'associateAttribution',
            'query'         => SPRUCE_ATTRIBUTION_QUERY,
            'variables'     => [
                'input' => [
                    'values'        => $values,
                    'origin'        => $hostname,
                    'originDetails' => $pathname,
                ],
            ],
        ], JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_SUBSTITUTE);

        if ($body === false) {
            error_log('spruceReportAttribution: json_encode failed: ' . json_last_error_msg());
            return;
        }

        $headers = [
            'Content-Type: application/json',
            'User-Agent: ' . SPRUCE_ATTRIBUTION_USER_AGENT,
        ];
        if ($cookieHeader !== '') {
            $headers[] = 'Cookie: ' . $cookieHeader;
        }

        $ch = curl_init(SPRUCE_ATTRIBUTION_ENDPOINT);
        curl_setopt_array($ch, [
            CURLOPT_POST           => true,
            CURLOPT_POSTFIELDS     => $body,
            CURLOPT_HTTPHEADER     => $headers,
            CURLOPT_HEADER         => true,  // include response headers in output so we can read Set-Cookie
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_TIMEOUT        => SPRUCE_ATTRIBUTION_TIMEOUT_SECONDS,
            CURLOPT_CONNECTTIMEOUT => SPRUCE_ATTRIBUTION_TIMEOUT_SECONDS,
            CURLOPT_FOLLOWLOCATION => false,
        ]);

        $response   = curl_exec($ch);
        $httpStatus = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE);
        $headerSize = (int) curl_getinfo($ch, CURLINFO_HEADER_SIZE);
        $curlErr    = curl_error($ch);
        curl_close($ch);

        if ($response === false) {
            error_log(sprintf(
                'spruceReportAttribution: transport error origin=%s details=%s err=%s',
                $hostname, $pathname, $curlErr
            ));
            return;
        }

        $rawHeaders = (string) substr($response, 0, $headerSize);
        $rawBody    = (string) substr($response, $headerSize);

        if ($httpStatus < 200 || $httpStatus >= 300) {
            error_log(sprintf(
                'spruceReportAttribution: http %d origin=%s details=%s body=%s',
                $httpStatus, $hostname, $pathname, substr($rawBody, 0, 500)
            ));
            // Don't relay Set-Cookie on a non-2xx response — those headers may
            // be from an error path we don't trust.
            return;
        }

        // Relay Set-Cookie response headers back to the user, but only if
        // we haven't already sent the user-facing response. In fire-and-forget
        // mode (after fastcgi_finish_request), headers_sent() will be true and
        // these calls are silently no-ops.
        if (!headers_sent()) {
            foreach (preg_split('/\r?\n/', $rawHeaders) as $line) {
                if (stripos($line, 'set-cookie:') === 0) {
                    header($line, false);
                }
            }
        }

        // Log GraphQL-layer failures (HTTP 200 with errors[] or success=false).
        $json = json_decode($rawBody, true);
        if (is_array($json)) {
            if (!empty($json['errors'])) {
                error_log(sprintf(
                    'spruceReportAttribution: graphql errors origin=%s details=%s errors=%s',
                    $hostname, $pathname, json_encode($json['errors'], JSON_UNESCAPED_SLASHES)
                ));
                return;
            }
            $payload = $json['data']['associateAttribution'] ?? null;
            if (is_array($payload) && ($payload['success'] ?? null) !== true) {
                error_log(sprintf(
                    'spruceReportAttribution: success=false origin=%s details=%s code=%s message=%s',
                    $hostname, $pathname,
                    $payload['errorCode']    ?? '',
                    $payload['errorMessage'] ?? ''
                ));
            }
        }
    } catch (\Throwable $t) {
        // Never let attribution problems affect the redirect.
        error_log('spruceReportAttribution: exception ' . $t->getMessage());
    }
 }

 /* --------------------------------------------------------------------------
 * Usage in the click-resolution handler.
 *
 * Adapt to your existing l.sprucehealth.com routing — this is the shape, not
 * a drop-in entry point.
 * -------------------------------------------------------------------------- */

 // $destination = lookupPreBlessedLink($_SERVER['REQUEST_URI']);  // your existing logic
 // if ($destination === null) {
 //     http_response_code(404);
 //     exit;
 // }

 // // Hardcode the host rather than using $_SERVER['HTTP_HOST'], which is
 // // attacker-controllable and would let a spoofed Host: header poison the
 // // attribution row's `origin` and `originDetails` fields.
 // $requestUrl   = 'https://l.sprucehealth.com' . $_SERVER['REQUEST_URI'];
 // $cookieHeader = $_SERVER['HTTP_COOKIE'] ?? '';
 // $hasDid       = isset($_COOKIE['did']) && $_COOKIE['did'] !== '';

 // if ($hasDid) {
 //     // Repeat click. Send the redirect first, then attribute in the
 //     // background so the user gets to their destination immediately.
 //     //
 //     // NOTE: after fastcgi_finish_request(), error_log() writes only land if
 //     // php.ini's `error_log` directive points at a file path. The default
 //     // SAPI logger is detached once the request is finalized, so failure logs
 //     // from this background task would be silently dropped. Verify ops has
 //     // `error_log = /var/log/php-attribution.log` (or similar) configured.
 //     header('Location: ' . $destination, true, 302);
 //     if (function_exists('fastcgi_finish_request')) {
 //         fastcgi_finish_request();
 //     }
 //     ignore_user_abort(true);
 //     spruceReportAttribution($requestUrl, $cookieHeader);
 //     exit;
 // }

 // // First click. The backend will mint a `did` cookie and Set-Cookie it; we
 // // need to relay that header to the user before redirecting, otherwise this
 // // first click is unattributable. Adds ~50–200 ms to the first click only.
 // spruceReportAttribution($requestUrl, $cookieHeader);
 // header('Location: ' . $destination, true, 302);
 // exit;
diff --git a/spec.md b/spec.md
	<?php
	/**
	* Spruce attribution reporting for l.sprucehealth.com.
	*
	* Wired into the click-resolution flow so each click on a pre-blessed link
	* lands a row in the Spruce `attributions` Looker model.
	*
	* Spec: https://gist.github.com/sibljon/bfd93706bd215806458f89686527bb2a
	*/

	const SPRUCE_ATTRIBUTION_ENDPOINT = 'https://msg-api.sprucehealth.com/graphql';
	const SPRUCE_ATTRIBUTION_USER_AGENT = 'l-sprucehealth-com/1.0';
	const SPRUCE_ATTRIBUTION_TIMEOUT_SECONDS = 5;

	const SPRUCE_ATTRIBUTION_QUERY = <<<'GQL'
	mutation AssociateAttribution($input: AssociateAttributionInput!) {
	associateAttribution(input: $input) {
	success
	errorCode
	errorMessage
	}
	}
	GQL;

	/**
	* POST associateAttribution to msg-api, then relay any Set-Cookie response
	* headers back to the user (so the backend's `did` cookie minting works).
	*
	* Failures are logged and swallowed. Never throws. Never lets an attribution
	* problem affect the user-facing redirect.
	*
	* Call this BEFORE you call `header('Location: ...')` if the user has no
	* `did` cookie inbound (so we can relay the freshly-minted Set-Cookie).
	* Call it AFTER `fastcgi_finish_request()` if the user already has `did`
	* (fire-and-forget, no Set-Cookie needed).
	*
	* @param string $requestUrl Full URL the user hit, e.g. "https://l.sprucehealth.com/x123?utm_source=newsletter".
	* @param string $cookieHeader Raw inbound Cookie header ($_SERVER['HTTP_COOKIE'] ?? '').
	*/
	function spruceReportAttribution(string $requestUrl, string $cookieHeader): void {
	try {
	$parsed = parse_url($requestUrl);
	if (!is_array($parsed) \|\| empty($parsed['host'])) {
	error_log('spruceReportAttribution: unparseable requestUrl');
	return;
	}
	$hostname = $parsed['host'];
	$pathname = $parsed['path'] ?? '/';
	$scheme = $parsed['scheme'] ?? 'https';
	$urlValue = $scheme . '://' . $hostname . $pathname;
	$rawQuery = $parsed['query'] ?? '';

	// Build values: synthetic `url` first, then one entry per inbound query
	// param. Split the raw query string ourselves rather than using
	// parse_str(), which collapses repeated keys (the spec requires us to
	// emit one entry per occurrence).
	$values = [['key' => 'url', 'value' => $urlValue]];
	if ($rawQuery !== '') {
	foreach (explode('&', $rawQuery) as $pair) {
	if ($pair === '') {
	continue;
	}
	$eq = strpos($pair, '=');
	if ($eq === false) {
	$values[] = ['key' => urldecode($pair), 'value' => ''];
	} else {
	$values[] = [
	'key' => urldecode(substr($pair, 0, $eq)),
	'value' => urldecode(substr($pair, $eq + 1)),
	];
	}
	}
	}

	// JSON_INVALID_UTF8_SUBSTITUTE replaces invalid UTF-8 byte sequences
	// (e.g. a query value of "%FF" decoded to raw byte 0xFF) with U+FFFD
	// rather than failing the whole encode and dropping the row.
	$body = json_encode([
	'operationName' => 'associateAttribution',
	'query' => SPRUCE_ATTRIBUTION_QUERY,
	'variables' => [
	'input' => [
	'values' => $values,
	'origin' => $hostname,
	'originDetails' => $pathname,
	],
	],
	], JSON_UNESCAPED_SLASHES \| JSON_INVALID_UTF8_SUBSTITUTE);

	if ($body === false) {
	error_log('spruceReportAttribution: json_encode failed: ' . json_last_error_msg());
	return;
	}

	$headers = [
	'Content-Type: application/json',
	'User-Agent: ' . SPRUCE_ATTRIBUTION_USER_AGENT,
	];
	if ($cookieHeader !== '') {
	$headers[] = 'Cookie: ' . $cookieHeader;
	}

	$ch = curl_init(SPRUCE_ATTRIBUTION_ENDPOINT);
	curl_setopt_array($ch, [
	CURLOPT_POST => true,
	CURLOPT_POSTFIELDS => $body,
	CURLOPT_HTTPHEADER => $headers,
	CURLOPT_HEADER => true, // include response headers in output so we can read Set-Cookie
	CURLOPT_RETURNTRANSFER => true,
	CURLOPT_TIMEOUT => SPRUCE_ATTRIBUTION_TIMEOUT_SECONDS,
	CURLOPT_CONNECTTIMEOUT => SPRUCE_ATTRIBUTION_TIMEOUT_SECONDS,
	CURLOPT_FOLLOWLOCATION => false,
	]);

	$response = curl_exec($ch);
	$httpStatus = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE);
	$headerSize = (int) curl_getinfo($ch, CURLINFO_HEADER_SIZE);
	$curlErr = curl_error($ch);
	curl_close($ch);

	if ($response === false) {
	error_log(sprintf(
	'spruceReportAttribution: transport error origin=%s details=%s err=%s',
	$hostname, $pathname, $curlErr
	));
	return;
	}

	$rawHeaders = (string) substr($response, 0, $headerSize);
	$rawBody = (string) substr($response, $headerSize);

	if ($httpStatus < 200 \|\| $httpStatus >= 300) {
	error_log(sprintf(
	'spruceReportAttribution: http %d origin=%s details=%s body=%s',
	$httpStatus, $hostname, $pathname, substr($rawBody, 0, 500)
	));
	// Don't relay Set-Cookie on a non-2xx response — those headers may
	// be from an error path we don't trust.
	return;
	}

	// Relay Set-Cookie response headers back to the user, but only if
	// we haven't already sent the user-facing response. In fire-and-forget
	// mode (after fastcgi_finish_request), headers_sent() will be true and
	// these calls are silently no-ops.
	if (!headers_sent()) {
	foreach (preg_split('/\r?\n/', $rawHeaders) as $line) {
	if (stripos($line, 'set-cookie:') === 0) {
	header($line, false);
	}
	}
	}

	// Log GraphQL-layer failures (HTTP 200 with errors[] or success=false).
	$json = json_decode($rawBody, true);
	if (is_array($json)) {
	if (!empty($json['errors'])) {
	error_log(sprintf(
	'spruceReportAttribution: graphql errors origin=%s details=%s errors=%s',
	$hostname, $pathname, json_encode($json['errors'], JSON_UNESCAPED_SLASHES)
	));
	return;
	}
	$payload = $json['data']['associateAttribution'] ?? null;
	if (is_array($payload) && ($payload['success'] ?? null) !== true) {
	error_log(sprintf(
	'spruceReportAttribution: success=false origin=%s details=%s code=%s message=%s',
	$hostname, $pathname,
	$payload['errorCode'] ?? '',
	$payload['errorMessage'] ?? ''
	));
	}
	}
	} catch (\Throwable $t) {
	// Never let attribution problems affect the redirect.
	error_log('spruceReportAttribution: exception ' . $t->getMessage());
	}
	}

	/* --------------------------------------------------------------------------
	* Usage in the click-resolution handler.
	*
	* Adapt to your existing l.sprucehealth.com routing — this is the shape, not
	* a drop-in entry point.
	* -------------------------------------------------------------------------- */

	// $destination = lookupPreBlessedLink($_SERVER['REQUEST_URI']); // your existing logic
	// if ($destination === null) {
	// http_response_code(404);
	// exit;
	// }

	// // Hardcode the host rather than using $_SERVER['HTTP_HOST'], which is
	// // attacker-controllable and would let a spoofed Host: header poison the
	// // attribution row's `origin` and `originDetails` fields.
	// $requestUrl = 'https://l.sprucehealth.com' . $_SERVER['REQUEST_URI'];
	// $cookieHeader = $_SERVER['HTTP_COOKIE'] ?? '';
	// $hasDid = isset($_COOKIE['did']) && $_COOKIE['did'] !== '';

	// if ($hasDid) {
	// // Repeat click. Send the redirect first, then attribute in the
	// // background so the user gets to their destination immediately.
	// //
	// // NOTE: after fastcgi_finish_request(), error_log() writes only land if
	// // php.ini's `error_log` directive points at a file path. The default
	// // SAPI logger is detached once the request is finalized, so failure logs
	// // from this background task would be silently dropped. Verify ops has
	// // `error_log = /var/log/php-attribution.log` (or similar) configured.
	// header('Location: ' . $destination, true, 302);
	// if (function_exists('fastcgi_finish_request')) {
	// fastcgi_finish_request();
	// }
	// ignore_user_abort(true);
	// spruceReportAttribution($requestUrl, $cookieHeader);
	// exit;
	// }

	// // First click. The backend will mint a `did` cookie and Set-Cookie it; we
	// // need to relay that header to the user before redirecting, otherwise this
	// // first click is unattributable. Adds ~50–200 ms to the first click only.
	// spruceReportAttribution($requestUrl, $cookieHeader);
	// header('Location: ' . $destination, true, 302);
	// exit;
No results found