Skip to content

Instantly share code, notes, and snippets.

@signaldust
Created August 24, 2024 16:06
Show Gist options
  • Save signaldust/4feadad310939f7b9f0bac279b88eb48 to your computer and use it in GitHub Desktop.
Save signaldust/4feadad310939f7b9f0bac279b88eb48 to your computer and use it in GitHub Desktop.
Sort of realistic WAV loader
#include "wav.h"
#undef DEBUG_WAVEFORMAT
#include <cstring> // for memcmp
using namespace dust;
static const char * eNotRIFF = "Unknown file format.";
static const char * eUnsupported = "Unsupported format.";
static const char * eCorrupt = "File appears corrupt.";
static const char * eReadFail = "Unexpected end-of-file.";
static const char * eFmtLen = "Format header too short.";
static const char * eSmplLen = "smpl chunk too short.";
static const char * eNoFmt = "Could not find a format header";
static const char * eNoData = "Could not find a data chunk";
// used in .wav 'cue ' chunk
struct WavCuePoint{
unsigned id; // unique number
unsigned position; // "play order" (actually this seems to have sample offset)
unsigned refChunk; // where is wave data (well, we'll ignore this)
unsigned chunkStart; // .. probably not safe to use
unsigned blockStart; // byte offset into chunk start
unsigned samplePos; // sample offset
};
struct WavSamplerChunk {
unsigned dwManufacturer;
unsigned dwProduct;
unsigned dwSamplePeriod;
unsigned dwMIDIUnityNote;
unsigned dwMIDIPitchFraction;
unsigned dwSMPTEFormat;
unsigned dwSMPTEOffset;
unsigned cSampleLoops; // this is the only thing we currently take
unsigned cbSamplerData;
};
struct WavSampleLoop {
unsigned dwIdentifier;
unsigned dwType;
unsigned dwStart;
unsigned dwEnd;
unsigned dwFraction;
unsigned dwPlayCount;
};
const char * dust::waveParse(FILE * f, WaveHeader * head)
{
// make sure we are at the beginning
unsigned offset = 0;
fseek(f, 0, SEEK_SET);
{
unsigned buf[3]; // RIFF + size + WAVE
if(1 != fread(buf, sizeof(buf), 1, f))
{
return eReadFail;
}
offset += sizeof(buf);
if(buf[0] != *((unsigned*)"RIFF"))
{
return eNotRIFF;
}
if(buf[2] != *((unsigned*)"WAVE"))
{
return eNotRIFF; // well, technically riff but
}
}
// flags for required chunks
bool gotFormat = false, gotData = false;
// parse chunks
while(true)
{
unsigned chunkHead[2]; // type and length
if(1 != fread(chunkHead, sizeof(chunkHead), 1, f))
{
break; // if we get a read error here, assume end of file?
}
offset += sizeof(chunkHead);
#ifdef DEBUG_WAVEFORMAT
fprintf(stderr, "chunk '%c%c%c%c' size %d\n",
((char*)chunkHead)[0],
((char*)chunkHead)[1],
((char*)chunkHead)[2],
((char*)chunkHead)[3], chunkHead[1]);
#endif
if(chunkHead[0] == *(unsigned*)"fmt ")
{
if(chunkHead[1] < sizeof(head->fmt)) return eFmtLen;
if(1 != fread(&head->fmt, sizeof(head->fmt), 1, f))
{
return eReadFail;
}
// parse data-format type
head->format = WaveHeader::WF_UNKNOWN;
#ifdef DEBUG_WAVEFORMAT
{
fprintf(stderr, " format: 0x%x, ch: 0x%x, sBytes: %d, sBits: %d\n",
head->fmt.format, head->fmt.channels,
head->fmt.sampleBytes, head->fmt.sampleBits);
}
#endif
// WAVE_FORMAT_EXTENSIBLE .. can encode the above types,
// so patch them back into the standard data
if(head->fmt.format == 0xFFFE)
{
if(chunkHead[1] < 40) return eFmtLen;
struct {
unsigned short cbSize;
unsigned short wValidBitsPerSample; // this is "hint only"
unsigned dwChannelMask;
unsigned char guid[16];
} ext;
if(1 != fread(&ext, sizeof(ext), 1, f))
{
return eReadFail;
}
#ifdef DEBUG_WAVEFORMAT
{
fprintf(stderr, " validBits: %d, chMask: 0x%x,"
" guid: 0x%x:0x%x:0x%x:0x%x\n",
ext.wValidBitsPerSample, ext.dwChannelMask,
*(unsigned*)(ext.guid),
*(unsigned*)(ext.guid+4),
*(unsigned*)(ext.guid+8),
*(unsigned*)(ext.guid+12)
);
}
#endif
// standard format?
if(!memcmp(ext.guid + 2,
"\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71",
14))
{
// just copy back the format code, and proceed as usual
head->fmt.format = *(unsigned short*) ext.guid;
}
}
if(head->fmt.format == 1)
{
if(head->fmt.sampleBits == 8
&& head->fmt.sampleBytes == 1 * head->fmt.channels)
head->format = WaveHeader::WF_PCM8;
if(head->fmt.sampleBits == 16
&& head->fmt.sampleBytes == 2 * head->fmt.channels)
head->format = WaveHeader::WF_PCM16;
if(head->fmt.sampleBits == 24
&& head->fmt.sampleBytes == 3 * head->fmt.channels)
head->format = WaveHeader::WF_PCM24;
if(head->fmt.sampleBits == 32
&& head->fmt.sampleBytes == 4 * head->fmt.channels)
head->format = WaveHeader::WF_PCM32;
}
if(head->fmt.format == 3
&& head->fmt.sampleBits == 32
&& head->fmt.sampleBytes == 4 * head->fmt.channels)
head->format = WaveHeader::WF_FLOAT;
if(head->format == WaveHeader::WF_UNKNOWN)
return eUnsupported;
gotFormat = true;
}
if(chunkHead[0] == *(unsigned*)"data")
{
// data-chunks don't get loaded here
// we just record the offset for later seek
head->dataoffset = offset;
head->datasize = chunkHead[1];
gotData = true;
}
#if 0 // copied from old code, port this at some point?
if(chunkHead[0] == *(unsigned*)"loop")
{
// 2x DWORd
if(offset + 8 > len) GelFail();
loop = true;
loop_start = *((unsigned*) (data + offset)); offset += 4;
loop_end = *((unsigned*) (data + offset)); offset += 4;
//printf("WAV: parsed loop: %d to %d\n", loop_start, loop_end);
offset = nextChunk;
}
if(chunkHead[0] == *(unsigned*)"cue ")
{
if(offset + 4 > len) GelFail();
unsigned nPoints = *((unsigned*) (data + offset)); offset += 4;
if(offset + nPoints * sizeof(WavCuePoint) > len) GelFail();
for(unsigned i = 0; i < nPoints; ++i)
{
WavCuePoint * cp = (WavCuePoint*) (data + offset);
offset += sizeof(WavCuePoint);
//printf("WAV: cue %d, pos %d, sample offset: %d\n",
// cp->id, cp->position, cp->samplePos);
}
offset = nextChunk;
}
#endif
if(chunkHead[0] == *(unsigned*)"smpl")
{
WavSamplerChunk smpl;
if(chunkHead[1] < sizeof(smpl)) return eSmplLen;
if(1 != fread(&smpl, sizeof(smpl), 1, f))
{
return eReadFail;
}
for(unsigned i = 0; i < smpl.cSampleLoops; ++i)
{
WavSampleLoop sloop;
if(chunkHead[1]<sizeof(smpl)+(i+1)*sizeof(sloop)) return eSmplLen;
if(1 != fread(&sloop, sizeof(sloop), 1, f))
{
return eReadFail;
}
offset += sizeof(WavSampleLoop);
if(!head->loop)
{
head->loop = true;
head->loop_start = sloop.dwStart;
head->loop_end = 1 + sloop.dwEnd;
#ifdef DEBUG_WAVEFORMAT
fprintf(stderr, "WAV: parsed smpl loop: %d to %d\n",
head->loop_start, head->loop_end);
#endif
}
// ignore loops beyond the first one
}
}
// do we have everything we understand?
//if(gotFormat && gotData) break;
// next chunk
offset += chunkHead[1];
fseek(f, offset, SEEK_SET);
}
if(!gotFormat) return eNoFmt;
if(!gotData) return eNoData;
return 0;
}
void dust::waveDecode
(FILE * f, WaveHeader * head, float * out,
unsigned channels, unsigned nsamples, unsigned offset)
{
// stride is the offseting byte-count
unsigned stride = head->fmt.sampleBytes;
// if this is past offset, just return
if(head->datasize < offset * stride) return;
fseek(f, head->dataoffset + offset * stride, SEEK_SET);
// clear output
for(unsigned i = 0; i < nsamples * channels; ++i) out[i] = 0;
// calculate how much we can really load
unsigned sampleMax = head->nsamples() - offset;
if(sampleMax > nsamples) sampleMax = nsamples;
switch(head->format)
{
case WaveHeader::WF_PCM8:
for(unsigned i = 0; i < sampleMax; ++i)
{
// always load all the data to maintain file offsets
for(unsigned c = 0; c < head->fmt.channels; ++c)
{
unsigned char byte;
if(1 != fread(&byte, 1, 1, f)) return;
if( c < channels )
{
// 8-bits is unsigned
out[i*channels + c] = (1.f/0x7f) * (byte - 127);
}
}
}
break;
case WaveHeader::WF_PCM16:
for(unsigned i = 0; i < sampleMax; ++i)
{
// always load all the data to maintain file offsets
for(unsigned c = 0; c < head->fmt.channels; ++c)
{
signed short word;
if(1 != fread(&word, 2, 1, f)) return;
if( c < channels )
{
out[i*channels + c] = (1.f / 0x7fff) * word;
}
}
}
break;
case WaveHeader::WF_PCM24:
for(unsigned i = 0; i < sampleMax; ++i)
{
// always load all the data to maintain file offsets
for(unsigned c = 0; c < head->fmt.channels; ++c)
{
signed quad; // need 4 bytes
if(1 != fread(&quad, 3, 1, f)) return;
if( c < channels )
{
// sign-extend to 32-bits
quad = (quad & 0xffFFff) | ((quad & 0x800000) * (0xff << 1));
out[i*channels + c] = (1.f / 0x7fFFff) * quad;
}
}
}
break;
case WaveHeader::WF_PCM32:
for(unsigned i = 0; i < sampleMax; ++i)
{
// always load all the data to maintain file offsets
for(unsigned c = 0; c < head->fmt.channels; ++c)
{
signed quad; // need 4 bytes
if(1 != fread(&quad, 4, 1, f)) return;
if( c < channels )
{
out[i*channels + c] = (1.f / 0x7fFFffFF) * quad;
}
}
}
break;
case WaveHeader::WF_FLOAT:
for(unsigned i = 0; i < sampleMax; ++i)
{
// always load all the data to maintain file offsets
for(unsigned c = 0; c < head->fmt.channels; ++c)
{
float value;
if(1 != fread(&value, 4, 1, f)) return;
if( c < channels )
{
out[i*channels + c] = value;
}
}
}
break;
default: return;
}
}
#pragma once
#include <stdio.h> // portable ;)
namespace dust
{
// WAV-file loading is split into two parts.
//
// Header-data is stored into a struct.
// Raw sampledata is decoded separately.
// This is just a collection of whatever data
// we want to load from the chunk structure
struct WaveHeader
{
// wave data formats
enum Format {
WF_UNKNOWN,
WF_PCM8,
WF_PCM16,
WF_PCM24,
WF_PCM32,
WF_FLOAT
};
Format format;
// datachunk..
unsigned dataoffset; // offset of data into file
unsigned datasize;
bool loop = false;
unsigned loop_start;
unsigned loop_end;
// then wav-format details..
// this is currently raw "as-is" byte-packed data
struct {
unsigned short format, channels;
unsigned samplerate, bytesPerSecond;
unsigned short sampleBytes, sampleBits;
} fmt;
// calculated per-channel sample-count
unsigned nsamples()
{
return datasize / fmt.sampleBytes;
}
};
// Parses the RIFF structure into the header struct.
// Seeks to the beginning automatically.
//
// returns 0 on success, error message on failure
const char * waveParse(FILE *, WaveHeader *);
// decode data into a buffer.. buffer channel-count is specified
// and the loaded count is smaller of buffer and file channels
// offset is into the file only (buffer should be offset by caller)
void waveDecode(FILE *, WaveHeader *, float * out,
unsigned channels, unsigned nsamples, unsigned offset = 0);
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment