bunkbail · February 23, 2026 02:03
diff --git a/FSR-Lite.glsl b/FSR-Lite.glsl
 // FSR-Lite v1.3
 //
 // A single-pass implementation of AMD FidelityFX Super Resolution.
 // Optimized and Tuned by bunkbail
 // Based on the mpv port by agyild and original code by AMD
 //
 // --- ARCHITECTURE OVERVIEW ---
 // FSR-Lite merges the two-pass FSR 1.0 pipeline (EASU + RCAS) into a single
 // shader while preserving virtually identical visual output.
 //
 // 1. Faithful EASU Replication:
 //    - Full 12-tap edge-adaptive Lanczos kernel with batched accumulation.
 //    - Identical direction/length analysis, kernel shaping, and deringing.
 //    - Uses the same fast math approximations (APrxLoRcpF1, APrxLoRsqF1).
 //    - Optional early exit for flat areas (bilinear fallback).
 //
 // 2. Source-Domain RCAS:
 //    - Original RCAS samples a 5-tap cross from the upscaled EASU output
 //      texture, which requires a second pass. FSR-Lite instead uses the 5
 //      bilinear sub-neighborhoods (lA..lE) already computed for EASU's
 //      gradient analysis as the RCAS ring. These form a smooth cross at
 //      ~1 source texel spacing and vary continuously with sub-pixel position.
 //    - The literal RCAS formula is applied: (e + lobe*(b+d+f+h)) / (1+4*lobe)
 //      with easu as center and lA,lB,lD,lE as the ring.
 //    - Adaptive lobe uses the same hitMin/hitMax math and APrxMedRcpF1
 //      reciprocal as the original, preserving RCAS's self-limiting behavior.
 //
 // 3. Gamma-Correct Processing:
 //    - SDR: Operates entirely in gamma space, matching the original FSR.
 //    - HDR: Applies x^4 linearization for PQ content, matching the original.
 //
 // --- COPYRIGHT & LICENSE ---
 // Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal
 // in the Software without restriction, including without limitation the rights
 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 // copies of the Software, and to permit persons to whom the Software is
 // furnished to do so, subject to the following conditions:
 // The above copyright notice and this permission notice shall be included in
 // all copies or substantial portions of the Software.
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 // THE SOFTWARE.

 //!HOOK LUMA
 //!BIND HOOKED
 //!DESC FSR-Lite v1.3 (EASU + RCAS)
 //!WHEN OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 >
 //!WIDTH OUTPUT.w OUTPUT.w LUMA.w 2 * < * LUMA.w 2 * OUTPUT.w LUMA.w 2 * > * + OUTPUT.w OUTPUT.w LUMA.w 2 * = * +
 //!HEIGHT OUTPUT.h OUTPUT.h LUMA.h 2 * < * LUMA.h 2 * OUTPUT.h LUMA.h 2 * > * + OUTPUT.h OUTPUT.h LUMA.h 2 * = * +
 //!COMPONENTS 1

 // ============================================================================
 // TUNABLE PARAMETERS
 // ============================================================================

 // Sharpness (RCAS convention: 0.0 = maximum, higher = less sharp in stops).
 // 0.2 matches the original FSR default.
 #define SHARPNESS 0.2

 // Whether the source content has PQ gamma. 0 = SDR, 1 = HDR (PQ).
 #define FSR_PQ 0

 // EASU deringing. Clamps output to center 2x2 min/max. 0 or 1.
 #define FSR_DERING 1

 // Early exit for flat areas (bilinear fallback). 0 or 1.
 #define FSR_QUIT_EARLY 1

 // RCAS denoise. Lessens sharpening on noisy areas. 0 or 1.
 #define FSR_RCAS_DENOISE 0

 // RCAS limit. Prevents unnatural over-sharpening.
 #define FSR_RCAS_LIMIT (0.25 - (1.0 / 16.0))

 // ============================================================================
 // FAST MATH
 // ============================================================================

 float APrxLoRcpF1(float a) {
    return uintBitsToFloat(uint(0x7ef07ebb) - floatBitsToUint(a));
 }

 float APrxLoRsqF1(float a) {
    return uintBitsToFloat(uint(0x5f347d74) - (floatBitsToUint(a) >> uint(1)));
 }

 float APrxMedRcpF1(float a) {
    float b = uintBitsToFloat(uint(0x7ef19fff) - floatBitsToUint(a));
    return b * (-b * a + 2.0);
 }

 float AMin3F1(float x, float y, float z) { return min(x, min(y, z)); }
 float AMax3F1(float x, float y, float z) { return max(x, max(y, z)); }

 // ============================================================================
 // PQ TRANSFORMS
 // ============================================================================

 #if (FSR_PQ == 1)
 float ToGamma2(float a) { return a * a * a * a; }
 float FromGamma2(float a) { return sqrt(sqrt(max(a, 0.0))); }
 #endif

 // ============================================================================
 // EASU TAP (Lanczos-2 approximation)
 // ============================================================================

 void FsrEasuTap(
    inout float aC, inout float aW,
    float d2, float lob, float clp, float c)
 {
    d2 = min(d2, clp);
    float wB = (0.25 * d2 - 1.25) * d2 + 1.0;
    float wA = lob * d2 - 1.0;
    wA *= wA;
    float tw = wB * wA;
    aC += c * tw;
    aW += tw;
 }

 // ============================================================================
 // MAIN
 // ============================================================================

 vec4 hook() {
    vec2 pp = HOOKED_pos * HOOKED_size - vec2(0.5);
    vec2 fp = floor(pp);
    pp -= fp;
    
    // 12-tap fetch
    float bL, cL, eL, fL, gL, hL, iL, jL, kL, lL, nL, oL;
    
    #if (defined(HOOKED_gather) && (__VERSION__ >= 400 || (GL_ES && __VERSION__ >= 310)))
    vec4 bczzL = HOOKED_gather(vec2((fp + vec2(1.0, -1.0)) * HOOKED_pt), 0);
    vec4 ijfeL = HOOKED_gather(vec2((fp + vec2(0.0,  1.0)) * HOOKED_pt), 0);
    vec4 klhgL = HOOKED_gather(vec2((fp + vec2(2.0,  1.0)) * HOOKED_pt), 0);
    vec4 zzonL = HOOKED_gather(vec2((fp + vec2(1.0,  3.0)) * HOOKED_pt), 0);
    
    bL = bczzL.x; cL = bczzL.y;
    iL = ijfeL.x; jL = ijfeL.y; fL = ijfeL.z; eL = ijfeL.w;
    kL = klhgL.x; lL = klhgL.y; hL = klhgL.z; gL = klhgL.w;
    oL = zzonL.z; nL = zzonL.w;
    #else
    bL = HOOKED_tex(vec2((fp + vec2(0.5, -0.5)) * HOOKED_pt)).r;
    cL = HOOKED_tex(vec2((fp + vec2(1.5, -0.5)) * HOOKED_pt)).r;
    eL = HOOKED_tex(vec2((fp + vec2(-0.5, 0.5)) * HOOKED_pt)).r;
    fL = HOOKED_tex(vec2((fp + vec2( 0.5, 0.5)) * HOOKED_pt)).r;
    gL = HOOKED_tex(vec2((fp + vec2( 1.5, 0.5)) * HOOKED_pt)).r;
    hL = HOOKED_tex(vec2((fp + vec2( 2.5, 0.5)) * HOOKED_pt)).r;
    iL = HOOKED_tex(vec2((fp + vec2(-0.5, 1.5)) * HOOKED_pt)).r;
    jL = HOOKED_tex(vec2((fp + vec2( 0.5, 1.5)) * HOOKED_pt)).r;
    kL = HOOKED_tex(vec2((fp + vec2( 1.5, 1.5)) * HOOKED_pt)).r;
    lL = HOOKED_tex(vec2((fp + vec2( 2.5, 1.5)) * HOOKED_pt)).r;
    nL = HOOKED_tex(vec2((fp + vec2(0.5, 2.5))  * HOOKED_pt)).r;
    oL = HOOKED_tex(vec2((fp + vec2(1.5, 2.5))  * HOOKED_pt)).r;
    #endif
    
    #if (FSR_PQ == 1)
    bL = ToGamma2(bL); cL = ToGamma2(cL);
    eL = ToGamma2(eL); fL = ToGamma2(fL);
    gL = ToGamma2(gL); hL = ToGamma2(hL);
    iL = ToGamma2(iL); jL = ToGamma2(jL);
    kL = ToGamma2(kL); lL = ToGamma2(lL);
    nL = ToGamma2(nL); oL = ToGamma2(oL);
    #endif
    
    // Bilinear weights
    vec4 bw;
    bw.x = (1.0 - pp.x) * (1.0 - pp.y);
    bw.y =        pp.x   * (1.0 - pp.y);
    bw.z = (1.0 - pp.x)  *        pp.y;
    bw.w =        pp.x   *        pp.y;
    
    // 5 bilinear sub-neighborhoods (reused for EASU analysis and RCAS ring)
    //       lA
    //    lB lC lD
    //       lE
    float lA = dot(bw, vec4(bL, cL, fL, gL));
    float lB = dot(bw, vec4(eL, fL, iL, jL));
    float lC = dot(bw, vec4(fL, gL, jL, kL));
    float lD = dot(bw, vec4(gL, hL, kL, lL));
    float lE = dot(bw, vec4(jL, kL, nL, oL));
    
    // Direction and length analysis
    float dc = lD - lC;
    float cb = lC - lB;
    float lenX = max(abs(dc), abs(cb));
    lenX = APrxLoRcpF1(lenX);
    float dirX = lD - lB;
    lenX = clamp(abs(dirX) * lenX, 0.0, 1.0);
    lenX *= lenX;
    
    float ec = lE - lC;
    float ca = lC - lA;
    float lenY = max(abs(ec), abs(ca));
    lenY = APrxLoRcpF1(lenY);
    float dirY = lE - lA;
    lenY = clamp(abs(dirY) * lenY, 0.0, 1.0);
    lenY *= lenY;
    
    float len = lenX + lenY;
    vec2 dir = vec2(dirX, dirY);
    
    // Normalize direction
    vec2 dir2 = dir * dir;
    float dirR = dir2.x + dir2.y;
    bool zro = dirR < (1.0 / 64.0);
    dirR = APrxLoRsqF1(dirR);
    
    #if (FSR_QUIT_EARLY == 1)
    if (zro) {
        return vec4(clamp(lC, 0.0, 1.0), 0.0, 0.0, 1.0);
    }
    #else
    dirR = zro ? 1.0 : dirR;
    dir.x = zro ? 1.0 : dir.x;
    #endif
    dir *= vec2(dirR);
    
    // Kernel shape
    len = len * 0.5;
    len *= len;
    float stretch = (dir.x * dir.x + dir.y * dir.y) * APrxLoRcpF1(max(abs(dir.x), abs(dir.y)));
    vec2 len2 = vec2(1.0 + (stretch - 1.0) * len, 1.0 + -0.5 * len);
    float lob = 0.5 + float((1.0 / 4.0 - 0.04) - 0.5) * len;
    float clp = APrxLoRcpF1(lob);
    
    // 12-tap Lanczos accumulation
    vec4 rs = vec4(dir.x, dir.y, -dir.y, dir.x) * len2.xxyy;
    vec2 ppRot = vec2(pp.x * rs.x + pp.y * rs.y, pp.x * rs.z + pp.y * rs.w);
    float Sxy = rs.x + rs.y; float Dxy = rs.x - rs.y;
    float Szw = rs.z + rs.w; float Dzw = rs.z - rs.w;
    
    float aC = 0.0;
    float aW = 0.0;
    
    {
        vec4 vX = vec4(-rs.y, Dxy, -Dxy, rs.y) - ppRot.x;
        vec4 vY = vec4(-rs.w, Dzw, -Dzw, rs.w) - ppRot.y;
        vec4 d2 = vX * vX + vY * vY;
        FsrEasuTap(aC, aW, d2.x, lob, clp, bL);
        FsrEasuTap(aC, aW, d2.y, lob, clp, cL);
        FsrEasuTap(aC, aW, d2.z, lob, clp, iL);
        FsrEasuTap(aC, aW, d2.w, lob, clp, jL);
    }
    {
        vec4 vX = vec4(0.0, -rs.x, Sxy, rs.x + Sxy) - ppRot.x;
        vec4 vY = vec4(0.0, -rs.z, Szw, rs.z + Szw) - ppRot.y;
        vec4 d2 = vX * vX + vY * vY;
        FsrEasuTap(aC, aW, d2.x, lob, clp, fL);
        FsrEasuTap(aC, aW, d2.y, lob, clp, eL);
        FsrEasuTap(aC, aW, d2.z, lob, clp, kL);
        FsrEasuTap(aC, aW, d2.w, lob, clp, lL);
    }
    {
        vec4 vX = vec4(2.0 * rs.x, rs.x, rs.x + 2.0 * rs.y, 2.0 * rs.y) - ppRot.x;
        vec4 vY = vec4(2.0 * rs.z, rs.z, rs.z + 2.0 * rs.w, 2.0 * rs.w) - ppRot.y;
        vec4 d2 = vX * vX + vY * vY;
        FsrEasuTap(aC, aW, d2.x, lob, clp, hL);
        FsrEasuTap(aC, aW, d2.y, lob, clp, gL);
        FsrEasuTap(aC, aW, d2.z, lob, clp, oL);
        FsrEasuTap(aC, aW, d2.w, lob, clp, nL);
    }
    
    float easu = aC / (aW + 1.0e-5);
    
    // Deringing
    #if (FSR_DERING == 1)
    float mn1 = min(AMin3F1(fL, gL, jL), kL);
    float mx1 = max(AMax3F1(fL, gL, jL), kL);
    easu = clamp(easu, mn1, mx1);
    #endif
    easu = clamp(easu, 0.0, 1.0);
    
    // RCAS (source-domain ring: lA, lB, lD, lE)
    float mn_ring = min(AMin3F1(lA, lB, lD), lE);
    float mx_ring = max(AMax3F1(lA, lB, lD), lE);
    
    float hitMinL = min(mn_ring, easu) / (4.0 * mx_ring);
    float hitMaxL = (1.0 - max(mx_ring, easu)) / (4.0 * mn_ring - 4.0);
    float lobeL = max(-hitMinL, hitMaxL);
    float rcas_lobe = max(float(-FSR_RCAS_LIMIT), min(lobeL, 0.0)) * exp2(-clamp(float(SHARPNESS), 0.0, 2.0));
    
    #if (FSR_RCAS_DENOISE == 1)
    float nz = 0.25 * (lA + lB + lD + lE) - easu;
    nz = clamp(abs(nz) * APrxMedRcpF1(
        AMax3F1(AMax3F1(lA, lB, easu), lD, lE) -
        AMin3F1(AMin3F1(lA, lB, easu), lD, lE)), 0.0, 1.0);
    nz = -0.5 * nz + 1.0;
    rcas_lobe *= nz;
    #endif
    
    float rcpL = APrxMedRcpF1(4.0 * rcas_lobe + 1.0);
    float res = (rcas_lobe * (lA + lB + lD + lE) + easu) * rcpL;
    
    #if (FSR_PQ == 1)
    res = FromGamma2(res);
    #endif
    
    return vec4(clamp(res, 0.0, 1.0), 0.0, 0.0, 1.0);
 }
	// FSR-Lite v1.3
	//
	// A single-pass implementation of AMD FidelityFX Super Resolution.
	// Optimized and Tuned by bunkbail
	// Based on the mpv port by agyild and original code by AMD
	//
	// --- ARCHITECTURE OVERVIEW ---
	// FSR-Lite merges the two-pass FSR 1.0 pipeline (EASU + RCAS) into a single
	// shader while preserving virtually identical visual output.
	//
	// 1. Faithful EASU Replication:
	// - Full 12-tap edge-adaptive Lanczos kernel with batched accumulation.
	// - Identical direction/length analysis, kernel shaping, and deringing.
	// - Uses the same fast math approximations (APrxLoRcpF1, APrxLoRsqF1).
	// - Optional early exit for flat areas (bilinear fallback).
	//
	// 2. Source-Domain RCAS:
	// - Original RCAS samples a 5-tap cross from the upscaled EASU output
	// texture, which requires a second pass. FSR-Lite instead uses the 5
	// bilinear sub-neighborhoods (lA..lE) already computed for EASU's
	// gradient analysis as the RCAS ring. These form a smooth cross at
	// ~1 source texel spacing and vary continuously with sub-pixel position.
	// - The literal RCAS formula is applied: (e + lobe(b+d+f+h)) / (1+4lobe)
	// with easu as center and lA,lB,lD,lE as the ring.
	// - Adaptive lobe uses the same hitMin/hitMax math and APrxMedRcpF1
	// reciprocal as the original, preserving RCAS's self-limiting behavior.
	//
	// 3. Gamma-Correct Processing:
	// - SDR: Operates entirely in gamma space, matching the original FSR.
	// - HDR: Applies x^4 linearization for PQ content, matching the original.
	//
	// --- COPYRIGHT & LICENSE ---
	// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
	// Permission is hereby granted, free of charge, to any person obtaining a copy
	// of this software and associated documentation files (the "Software"), to deal
	// in the Software without restriction, including without limitation the rights
	// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	// copies of the Software, and to permit persons to whom the Software is
	// furnished to do so, subject to the following conditions:
	// The above copyright notice and this permission notice shall be included in
	// all copies or substantial portions of the Software.
	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
	// THE SOFTWARE.

	//!HOOK LUMA
	//!BIND HOOKED
	//!DESC FSR-Lite v1.3 (EASU + RCAS)
	//!WHEN OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 >
	//!WIDTH OUTPUT.w OUTPUT.w LUMA.w 2 * < * LUMA.w 2 * OUTPUT.w LUMA.w 2 * > * + OUTPUT.w OUTPUT.w LUMA.w 2 * = * +
	//!HEIGHT OUTPUT.h OUTPUT.h LUMA.h 2 * < * LUMA.h 2 * OUTPUT.h LUMA.h 2 * > * + OUTPUT.h OUTPUT.h LUMA.h 2 * = * +
	//!COMPONENTS 1

	// ============================================================================
	// TUNABLE PARAMETERS
	// ============================================================================

	// Sharpness (RCAS convention: 0.0 = maximum, higher = less sharp in stops).
	// 0.2 matches the original FSR default.
	#define SHARPNESS 0.2

	// Whether the source content has PQ gamma. 0 = SDR, 1 = HDR (PQ).
	#define FSR_PQ 0

	// EASU deringing. Clamps output to center 2x2 min/max. 0 or 1.
	#define FSR_DERING 1

	// Early exit for flat areas (bilinear fallback). 0 or 1.
	#define FSR_QUIT_EARLY 1

	// RCAS denoise. Lessens sharpening on noisy areas. 0 or 1.
	#define FSR_RCAS_DENOISE 0

	// RCAS limit. Prevents unnatural over-sharpening.
	#define FSR_RCAS_LIMIT (0.25 - (1.0 / 16.0))

	// ============================================================================
	// FAST MATH
	// ============================================================================

	float APrxLoRcpF1(float a) {
	return uintBitsToFloat(uint(0x7ef07ebb) - floatBitsToUint(a));
	}

	float APrxLoRsqF1(float a) {
	return uintBitsToFloat(uint(0x5f347d74) - (floatBitsToUint(a) >> uint(1)));
	}

	float APrxMedRcpF1(float a) {
	float b = uintBitsToFloat(uint(0x7ef19fff) - floatBitsToUint(a));
	return b * (-b * a + 2.0);
	}

	float AMin3F1(float x, float y, float z) { return min(x, min(y, z)); }
	float AMax3F1(float x, float y, float z) { return max(x, max(y, z)); }

	// ============================================================================
	// PQ TRANSFORMS
	// ============================================================================

	#if (FSR_PQ == 1)
	float ToGamma2(float a) { return a * a * a * a; }
	float FromGamma2(float a) { return sqrt(sqrt(max(a, 0.0))); }
	#endif

	// ============================================================================
	// EASU TAP (Lanczos-2 approximation)
	// ============================================================================

	void FsrEasuTap(
	inout float aC, inout float aW,
	float d2, float lob, float clp, float c)
	{
	d2 = min(d2, clp);
	float wB = (0.25 * d2 - 1.25) * d2 + 1.0;
	float wA = lob * d2 - 1.0;
	wA *= wA;
	float tw = wB * wA;
	aC += c * tw;
	aW += tw;
	}

	// ============================================================================
	// MAIN
	// ============================================================================

	vec4 hook() {
	vec2 pp = HOOKED_pos * HOOKED_size - vec2(0.5);
	vec2 fp = floor(pp);
	pp -= fp;

	// 12-tap fetch
	float bL, cL, eL, fL, gL, hL, iL, jL, kL, lL, nL, oL;

	#if (defined(HOOKED_gather) && (__VERSION__ >= 400 \|\| (GL_ES && __VERSION__ >= 310)))
	vec4 bczzL = HOOKED_gather(vec2((fp + vec2(1.0, -1.0)) * HOOKED_pt), 0);
	vec4 ijfeL = HOOKED_gather(vec2((fp + vec2(0.0, 1.0)) * HOOKED_pt), 0);
	vec4 klhgL = HOOKED_gather(vec2((fp + vec2(2.0, 1.0)) * HOOKED_pt), 0);
	vec4 zzonL = HOOKED_gather(vec2((fp + vec2(1.0, 3.0)) * HOOKED_pt), 0);

	bL = bczzL.x; cL = bczzL.y;
	iL = ijfeL.x; jL = ijfeL.y; fL = ijfeL.z; eL = ijfeL.w;
	kL = klhgL.x; lL = klhgL.y; hL = klhgL.z; gL = klhgL.w;
	oL = zzonL.z; nL = zzonL.w;
	#else
	bL = HOOKED_tex(vec2((fp + vec2(0.5, -0.5)) * HOOKED_pt)).r;
	cL = HOOKED_tex(vec2((fp + vec2(1.5, -0.5)) * HOOKED_pt)).r;
	eL = HOOKED_tex(vec2((fp + vec2(-0.5, 0.5)) * HOOKED_pt)).r;
	fL = HOOKED_tex(vec2((fp + vec2( 0.5, 0.5)) * HOOKED_pt)).r;
	gL = HOOKED_tex(vec2((fp + vec2( 1.5, 0.5)) * HOOKED_pt)).r;
	hL = HOOKED_tex(vec2((fp + vec2( 2.5, 0.5)) * HOOKED_pt)).r;
	iL = HOOKED_tex(vec2((fp + vec2(-0.5, 1.5)) * HOOKED_pt)).r;
	jL = HOOKED_tex(vec2((fp + vec2( 0.5, 1.5)) * HOOKED_pt)).r;
	kL = HOOKED_tex(vec2((fp + vec2( 1.5, 1.5)) * HOOKED_pt)).r;
	lL = HOOKED_tex(vec2((fp + vec2( 2.5, 1.5)) * HOOKED_pt)).r;
	nL = HOOKED_tex(vec2((fp + vec2(0.5, 2.5)) * HOOKED_pt)).r;
	oL = HOOKED_tex(vec2((fp + vec2(1.5, 2.5)) * HOOKED_pt)).r;
	#endif

	#if (FSR_PQ == 1)
	bL = ToGamma2(bL); cL = ToGamma2(cL);
	eL = ToGamma2(eL); fL = ToGamma2(fL);
	gL = ToGamma2(gL); hL = ToGamma2(hL);
	iL = ToGamma2(iL); jL = ToGamma2(jL);
	kL = ToGamma2(kL); lL = ToGamma2(lL);
	nL = ToGamma2(nL); oL = ToGamma2(oL);
	#endif

	// Bilinear weights
	vec4 bw;
	bw.x = (1.0 - pp.x) * (1.0 - pp.y);
	bw.y = pp.x * (1.0 - pp.y);
	bw.z = (1.0 - pp.x) * pp.y;
	bw.w = pp.x * pp.y;

	// 5 bilinear sub-neighborhoods (reused for EASU analysis and RCAS ring)
	// lA
	// lB lC lD
	// lE
	float lA = dot(bw, vec4(bL, cL, fL, gL));
	float lB = dot(bw, vec4(eL, fL, iL, jL));
	float lC = dot(bw, vec4(fL, gL, jL, kL));
	float lD = dot(bw, vec4(gL, hL, kL, lL));
	float lE = dot(bw, vec4(jL, kL, nL, oL));

	// Direction and length analysis
	float dc = lD - lC;
	float cb = lC - lB;
	float lenX = max(abs(dc), abs(cb));
	lenX = APrxLoRcpF1(lenX);
	float dirX = lD - lB;
	lenX = clamp(abs(dirX) * lenX, 0.0, 1.0);
	lenX *= lenX;

	float ec = lE - lC;
	float ca = lC - lA;
	float lenY = max(abs(ec), abs(ca));
	lenY = APrxLoRcpF1(lenY);
	float dirY = lE - lA;
	lenY = clamp(abs(dirY) * lenY, 0.0, 1.0);
	lenY *= lenY;

	float len = lenX + lenY;
	vec2 dir = vec2(dirX, dirY);

	// Normalize direction
	vec2 dir2 = dir * dir;
	float dirR = dir2.x + dir2.y;
	bool zro = dirR < (1.0 / 64.0);
	dirR = APrxLoRsqF1(dirR);

	#if (FSR_QUIT_EARLY == 1)
	if (zro) {
	return vec4(clamp(lC, 0.0, 1.0), 0.0, 0.0, 1.0);
	}
	#else
	dirR = zro ? 1.0 : dirR;
	dir.x = zro ? 1.0 : dir.x;
	#endif
	dir *= vec2(dirR);

	// Kernel shape
	len = len * 0.5;
	len *= len;
	float stretch = (dir.x * dir.x + dir.y * dir.y) * APrxLoRcpF1(max(abs(dir.x), abs(dir.y)));
	vec2 len2 = vec2(1.0 + (stretch - 1.0) * len, 1.0 + -0.5 * len);
	float lob = 0.5 + float((1.0 / 4.0 - 0.04) - 0.5) * len;
	float clp = APrxLoRcpF1(lob);

	// 12-tap Lanczos accumulation
	vec4 rs = vec4(dir.x, dir.y, -dir.y, dir.x) * len2.xxyy;
	vec2 ppRot = vec2(pp.x * rs.x + pp.y * rs.y, pp.x * rs.z + pp.y * rs.w);
	float Sxy = rs.x + rs.y; float Dxy = rs.x - rs.y;
	float Szw = rs.z + rs.w; float Dzw = rs.z - rs.w;

	float aC = 0.0;
	float aW = 0.0;

	{
	vec4 vX = vec4(-rs.y, Dxy, -Dxy, rs.y) - ppRot.x;
	vec4 vY = vec4(-rs.w, Dzw, -Dzw, rs.w) - ppRot.y;
	vec4 d2 = vX * vX + vY * vY;
	FsrEasuTap(aC, aW, d2.x, lob, clp, bL);
	FsrEasuTap(aC, aW, d2.y, lob, clp, cL);
	FsrEasuTap(aC, aW, d2.z, lob, clp, iL);
	FsrEasuTap(aC, aW, d2.w, lob, clp, jL);
	}
	{
	vec4 vX = vec4(0.0, -rs.x, Sxy, rs.x + Sxy) - ppRot.x;
	vec4 vY = vec4(0.0, -rs.z, Szw, rs.z + Szw) - ppRot.y;
	vec4 d2 = vX * vX + vY * vY;
	FsrEasuTap(aC, aW, d2.x, lob, clp, fL);
	FsrEasuTap(aC, aW, d2.y, lob, clp, eL);
	FsrEasuTap(aC, aW, d2.z, lob, clp, kL);
	FsrEasuTap(aC, aW, d2.w, lob, clp, lL);
	}
	{
	vec4 vX = vec4(2.0 * rs.x, rs.x, rs.x + 2.0 * rs.y, 2.0 * rs.y) - ppRot.x;
	vec4 vY = vec4(2.0 * rs.z, rs.z, rs.z + 2.0 * rs.w, 2.0 * rs.w) - ppRot.y;
	vec4 d2 = vX * vX + vY * vY;
	FsrEasuTap(aC, aW, d2.x, lob, clp, hL);
	FsrEasuTap(aC, aW, d2.y, lob, clp, gL);
	FsrEasuTap(aC, aW, d2.z, lob, clp, oL);
	FsrEasuTap(aC, aW, d2.w, lob, clp, nL);
	}

	float easu = aC / (aW + 1.0e-5);

	// Deringing
	#if (FSR_DERING == 1)
	float mn1 = min(AMin3F1(fL, gL, jL), kL);
	float mx1 = max(AMax3F1(fL, gL, jL), kL);
	easu = clamp(easu, mn1, mx1);
	#endif
	easu = clamp(easu, 0.0, 1.0);

	// RCAS (source-domain ring: lA, lB, lD, lE)
	float mn_ring = min(AMin3F1(lA, lB, lD), lE);
	float mx_ring = max(AMax3F1(lA, lB, lD), lE);

	float hitMinL = min(mn_ring, easu) / (4.0 * mx_ring);
	float hitMaxL = (1.0 - max(mx_ring, easu)) / (4.0 * mn_ring - 4.0);
	float lobeL = max(-hitMinL, hitMaxL);
	float rcas_lobe = max(float(-FSR_RCAS_LIMIT), min(lobeL, 0.0)) * exp2(-clamp(float(SHARPNESS), 0.0, 2.0));

	#if (FSR_RCAS_DENOISE == 1)
	float nz = 0.25 * (lA + lB + lD + lE) - easu;
	nz = clamp(abs(nz) * APrxMedRcpF1(
	AMax3F1(AMax3F1(lA, lB, easu), lD, lE) -
	AMin3F1(AMin3F1(lA, lB, easu), lD, lE)), 0.0, 1.0);
	nz = -0.5 * nz + 1.0;
	rcas_lobe *= nz;
	#endif

	float rcpL = APrxMedRcpF1(4.0 * rcas_lobe + 1.0);
	float res = (rcas_lobe * (lA + lB + lD + lE) + easu) * rcpL;

	#if (FSR_PQ == 1)
	res = FromGamma2(res);
	#endif

	return vec4(clamp(res, 0.0, 1.0), 0.0, 0.0, 1.0);
	}
No results found