Skip to content

Instantly share code, notes, and snippets.

@rygorous
rygorous / more_serious_fft.cpp
Created December 10, 2025 06:11
Core for a 2x unrolled radix-2 (not really radix-4) FFT kernel
// The FFT alg used here was designed to be very FMA-friendly, but because we can't assume FMAs are present on
// all target HW and want consistent results everywhere, we're using FMA-less algorithms for this application.
// Notation throughout this file:
//
// Let z = a + bi. Then conj(z) = a - bi.
//
// We can swap the real and imaginary parts of z to yield s(z) = b + ai ("swap").
// Now because
//
@rygorous
rygorous / gist:9aac91598af0a94ab4693210291e1f94
Created December 10, 2025 05:37
Reference r2 FFT code
static size_t const kMaxN = 2048; // This is the largest straight FFT we support
struct complexf
{
float re;
float im;
complexf() {}
complexf(float r) : re(r), im(0.0f) {}
complexf(float r, float i) : re(r), im(i) {}
@rygorous
rygorous / morton3d_fun.cpp
Created July 4, 2025 05:49
Vectorized 3D Morton encoding for no reason
#include <immintrin.h>
#include <stdio.h>
#include <stdint.h>
#include <assert.h>
typedef uint32_t uint32;
typedef __m128i Vec128_U32;
// "Insert" two 0 bits after each of the 11 low bits of x
static uint32 Part1By2(uint32 x)
@rygorous
rygorous / gist:6f96cc21292cc704f53ef77e5b4be519
Created May 22, 2025 02:25
Oodle Texture BC7RD "preserve extremes" constraint validation
//===================================================================
// constraint validation for preserve extremes mode
// Cold part of are_endpoitns_permitted: the actual constraint validation
static RADNOINLINE bool are_endpoints_permitted_cold(const BC7BlockState & st, const bc7rd_blockinfo& info)
{
const BC7Flags flags = info.flags;
if (st.mode <= 3)
{
@rygorous
rygorous / gist:52cc2a23a73813d645581046dced27fd
Created May 22, 2025 00:15
Oodle Texture "preserve extremes" rules
{
BC7Flags flags = in_flags;
// Preserve extremes mode.
//
// This mode preserves values of 0 and 255 in the alpha channel exactly. In general,
// we can do this in any one channel, but restricting this to alpha makes the interface
// simpler, is consistent with what we do for BC3, and doesn't seem like a signfiicant
// limitation for the user.
//
uint EvenBitMask = 0x55555555u;
uint HighIndexBit = PackedIndices >> 1;
float NumWeight1 = float(countbits(EvenBitMask & ~HighIndexBit & PackedIndices));
float NumWeight2 = float(countbits(EvenBitMask & HighIndexBit & ~PackedIndices));
float NumWeight3 = float(countbits(EvenBitMask & HighIndexBit & PackedIndices));
// with NV LOP3: 1 shift, 3 LOP3, 3 pop count, 3 int->float = 10 insns total for 16 pixels
// without: 1 shift, 2 NOT, 5 AND, 3 pop count, 3 int->float = 14 insns total for 16 pixels
@rygorous
rygorous / rr_dds.h
Created March 7, 2025 01:45
rr_dds loader/writer
//===================================================
// Oodle2 DDS Tool
// (C) Copyright 1994-2022 Epic Games Tools LLC
//===================================================
#ifndef RR_DDS_INCLUDED
#define RR_DDS_INCLUDED
#include <stdint.h>
#include <stddef.h>
@rygorous
rygorous / main.rs
Created March 2, 2025 04:42
Base64 fixed point test
use bit_set::BitSet;
use std::mem;
// Vanilla RFC 4648
const ALPHABET: &str = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
// URL-safe RFC 4648
//const ALPHABET: &str = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
const COUNT: usize = 1usize << 24; // 3 bytes worth suffices for this test
fn lookup(index: u32) -> u32 {
@rygorous
rygorous / bc7rd_chart_example.html
Created January 2, 2025 09:46
Example run with a BC7 RD plot diff
<!DOCTYPE html>
<html><head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
<style>
body { background: #fff; margin: 10px; font: 12pt Calibri,Arial,Helvetica,sans-serif; }
h1 { font-size: 18pt; margin: .5em 0 .5em 0; }
h2 { font-size: 16pt; margin: .5em 0 .5em 0; }
canvas { margin: auto; }
table { border-spacing: 0; }
table.rdresult tbody tr:nth-child(even) { background: #eee; }
@rygorous
rygorous / rdchart_template_prefix.html
Created January 2, 2025 09:40
RD chart template for Oodle Texture eval
<!DOCTYPE html>
<html>
<head>
<style>
body { background: #fff; margin: 10px; font: 12pt Calibri,Arial,Helvetica,sans-serif; }
h1 { font-size: 18pt; margin: .5em 0 .5em 0; }
h2 { font-size: 16pt; margin: .5em 0 .5em 0; }
canvas { margin: auto; }
table { border-spacing: 0; }
table.rdresult tbody tr:nth-child(even) { background: #eee; }