Skip to content

Instantly share code, notes, and snippets.

@greggman
Created September 1, 2025 06:55
Show Gist options
  • Save greggman/36c9e0ba4b5c20b2be5b895e4394e63f to your computer and use it in GitHub Desktop.
Save greggman/36c9e0ba4b5c20b2be5b895e4394e63f to your computer and use it in GitHub Desktop.
mp4 parser - try 01
html, body {
height: 100%;
}
<input id="file" type="file">
<pre id="out"></pre>
// --- Minimal MP4 (ISO-BMFF) metadata parser in plain JS ---
// Reads only box headers and small metadata boxes. Skips media data (mdat).
const textDecoder = new TextDecoder("ascii");
function be16(view, off) { return view.getUint16(off, false); }
function be32(view, off) { return view.getUint32(off, false); }
function be64(view, off) {
if (typeof view.getBigUint64 === "function") return view.getBigUint64(off, false);
// Fallback for older browsers: combine two 32-bit parts into a BigInt
const hi = BigInt(view.getUint32(off, false));
const lo = BigInt(view.getUint32(off + 4, false));
return (hi << 32n) + lo;
}
function ascii(view, off, len) {
return textDecoder.decode(new Uint8Array(view.buffer, view.byteOffset + off, len));
}
async function readRange(blob, startBig, len) {
const start = Number(startBig);
const slice = blob.slice(start, start + len);
const buf = await slice.arrayBuffer();
return new DataView(buf);
}
async function readBoxHeader(blob, start) {
const fileEnd = BigInt(blob.size);
if (start + 8n > fileEnd) return null;
// Read the minimal 8-byte header first
let dv = await readRange(blob, start, 8);
let size = BigInt(be32(dv, 0));
const type = ascii(dv, 4, 4);
let headerBytes = 8;
// Extended size
if (size === 1n) {
// Need 16 bytes total for extended size
dv = await readRange(blob, start, 16);
size = be64(dv, 8);
headerBytes = 16;
}
// UUID box has extra 16 bytes in the header
if (type === "uuid") {
headerBytes += 16;
}
// size 0 => box extends to EOF (top-level only)
if (size === 0n) size = fileEnd - start;
// Basic sanity: size must include header and be positive
if (size < BigInt(headerBytes)) return null;
return {
type,
size, // total box size (including header)
headerBytes,
start, // offset where the box starts
contentStart: start + BigInt(headerBytes),
};
}
async function* iterateBoxes(blob, start = 0n, end = BigInt(blob.size)) {
let off = start;
while (off + 8n <= end) {
const hdr = await readBoxHeader(blob, off);
if (!hdr) break;
yield hdr;
const next = off + hdr.size;
if (next <= off) break; // corrupted (avoid infinite loop)
off = next;
}
}
async function* iterateChildBoxes(blob, parent) {
const start = parent.contentStart;
const end = parent.start + parent.size;
yield* iterateBoxes(blob, start, end);
}
// ---- Parsers for a few common metadata boxes ----
async function parseFtyp(blob, box) {
const dv = await readRange(blob, box.contentStart, Number(box.size - BigInt(box.headerBytes)));
const majorBrand = ascii(dv, 0, 4);
const minorVersion = be32(dv, 4);
const brands = [];
for (let off = 8; off + 4 <= dv.byteLength; off += 4) brands.push(ascii(dv, off, 4));
return { majorBrand, minorVersion, compatibleBrands: brands };
}
async function parseMvhd(blob, box) {
const dv = await readRange(blob, box.contentStart, Number(box.size - BigInt(box.headerBytes)));
const version = dv.getUint8(0);
let off = 4; // version(1) + flags(3)
if (version === 1) {
off += 16; // creation/modification (8+8)
const timescale = be32(dv, off); off += 4;
const duration = be64(dv, off); off += 8;
const durationSec = Number(duration) / timescale;
return { timescale, duration: Number(duration), durationSec };
} else {
off += 8; // creation/modification (4+4)
const timescale = be32(dv, off); off += 4;
const duration = be32(dv, off); off += 4;
const durationSec = duration / timescale;
return { timescale, duration, durationSec };
}
}
async function parseTkhd(blob, box) {
const dv = await readRange(blob, box.contentStart, Number(box.size - BigInt(box.headerBytes)));
const version = dv.getUint8(0);
let off = 4;
if (version === 1) {
// creation(8) + modification(8) + trackID(4) + reserved(4) + duration(8)
off += 8 + 8 + 4 + 4 + 8;
} else {
// creation(4) + modification(4) + trackID(4) + reserved(4) + duration(4)
off += 4 + 4 + 4 + 4 + 4;
}
off += 8; // reserved
off += 2 + 2 + 2 + 2; // layer(2), alternate_group(2), volume(2), reserved(2)
off += 36; // matrix (9 * 4 bytes)
const width = be32(dv, off) / 65536; // 16.16 fixed
const height = be32(dv, off + 4) / 65536; // 16.16 fixed
return { width, height };
}
async function parseMdhd(blob, box) {
const dv = await readRange(blob, box.contentStart, Number(box.size - BigInt(box.headerBytes)));
const version = dv.getUint8(0);
let off = 4;
if (version === 1) {
off += 16; // creation/modification (8+8)
const timescale = be32(dv, off); off += 4;
const duration = be64(dv, off); off += 8;
const language = be16(dv, off) & 0x7FFF;
return { timescale, duration: Number(duration), language };
} else {
off += 8; // creation/modification (4+4)
const timescale = be32(dv, off); off += 4;
const duration = be32(dv, off); off += 4;
const language = be16(dv, off) & 0x7FFF;
return { timescale, duration, language };
}
}
async function parseHdlr(blob, box) {
const dv = await readRange(blob, box.contentStart, Number(box.size - BigInt(box.headerBytes)));
// pre_defined(4), handler_type(4)
const handlerType = ascii(dv, 8, 4); // 'vide', 'soun', 'meta', etc.
return { handlerType };
}
// ---- Main entry: collect basic metadata ----
async function parseMp4Metadata(blob) {
const result = { tracks: [] };
for await (const top of iterateBoxes(blob)) {
if (top.type === "ftyp") {
result.brands = await parseFtyp(blob, top);
} else if (top.type === "moov") {
for await (const moovChild of iterateChildBoxes(blob, top)) {
if (moovChild.type === "mvhd") {
const mvhd = await parseMvhd(blob, moovChild);
result.durationSec = mvhd.durationSec;
} else if (moovChild.type === "trak") {
const track = {};
for await (const trakChild of iterateChildBoxes(blob, moovChild)) {
if (trakChild.type === "tkhd") {
Object.assign(track, await parseTkhd(blob, trakChild));
} else if (trakChild.type === "mdia") {
for await (const mdiaChild of iterateChildBoxes(blob, trakChild)) {
if (mdiaChild.type === "mdhd") {
const mdhd = await parseMdhd(blob, mdiaChild);
track.timescale = mdhd.timescale;
track.duration = mdhd.duration;
} else if (mdiaChild.type === "hdlr") {
const h = await parseHdlr(blob, mdiaChild);
track.handler = h.handlerType; // 'vide'/'soun'/...
}
}
}
}
result.tracks.push(track);
}
}
}
// We don't read 'mdat' at all—just skip it by header size.
}
return result;
}
// ---- Helper to pretty-print top-level boxes (optional) ----
async function listTopLevelBoxes(blob) {
const arr = [];
for await (const b of iterateBoxes(blob)) {
arr.push({ type: b.type, size: b.size.toString(), start: b.start.toString() });
}
return arr;
}
// ---- Example usage with a file input ----
// <input id="file" type="file" accept="video/mp4,video/*" />
// <pre id="out"></pre>
{
const input = document.getElementById("file");
const out = document.getElementById("out");
input.addEventListener("change", async () => {
const file = input.files && input.files[0];
console.log(file);
if (!file) return;
const meta = await parseMp4Metadata(file);
const tl = await listTopLevelBoxes(file);
out.textContent = JSON.stringify({ topLevel: tl, metadata: meta }, null, 2);
});
}
// ---- Example usage with fetch ----
// (async () => {
// const blob = await (await fetch("some.mp4")).blob();
// console.log(await parseMp4Metadata(blob));
// })();
{"name":"mp4 parser - try 01","settings":{},"filenames":["index.html","index.css","index.js"]}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment