greggman · September 1, 2025 06:55
diff --git a/README.md b/README.md
diff --git a/index.css b/index.css
 html, body {
  height: 100%;
 }
diff --git a/index.html b/index.html
 <input id="file" type="file">
 <pre id="out"></pre>
diff --git a/index.js b/index.js
 // --- Minimal MP4 (ISO-BMFF) metadata parser in plain JS ---
 // Reads only box headers and small metadata boxes. Skips media data (mdat).

 const textDecoder = new TextDecoder("ascii");

 function be16(view, off) { return view.getUint16(off, false); }
 function be32(view, off) { return view.getUint32(off, false); }
 function be64(view, off) {
  if (typeof view.getBigUint64 === "function") return view.getBigUint64(off, false);
  // Fallback for older browsers: combine two 32-bit parts into a BigInt
  const hi = BigInt(view.getUint32(off, false));
  const lo = BigInt(view.getUint32(off + 4, false));
  return (hi << 32n) + lo;
 }
 function ascii(view, off, len) {
  return textDecoder.decode(new Uint8Array(view.buffer, view.byteOffset + off, len));
 }
 async function readRange(blob, startBig, len) {
  const start = Number(startBig);
  const slice = blob.slice(start, start + len);
  const buf = await slice.arrayBuffer();
  return new DataView(buf);
 }

 async function readBoxHeader(blob, start) {
  const fileEnd = BigInt(blob.size);
  if (start + 8n > fileEnd) return null;

  // Read the minimal 8-byte header first
  let dv = await readRange(blob, start, 8);
  let size = BigInt(be32(dv, 0));
  const type = ascii(dv, 4, 4);

  let headerBytes = 8;

  // Extended size
  if (size === 1n) {
    // Need 16 bytes total for extended size
    dv = await readRange(blob, start, 16);
    size = be64(dv, 8);
    headerBytes = 16;
  }

  // UUID box has extra 16 bytes in the header
  if (type === "uuid") {
    headerBytes += 16;
  }

  // size 0 => box extends to EOF (top-level only)
  if (size === 0n) size = fileEnd - start;

  // Basic sanity: size must include header and be positive
  if (size < BigInt(headerBytes)) return null;

  return {
    type,
    size,                 // total box size (including header)
    headerBytes,
    start,                // offset where the box starts
    contentStart: start + BigInt(headerBytes),
  };
 }

 async function* iterateBoxes(blob, start = 0n, end = BigInt(blob.size)) {
  let off = start;
  while (off + 8n <= end) {
    const hdr = await readBoxHeader(blob, off);
    if (!hdr) break;
    yield hdr;
    const next = off + hdr.size;
    if (next <= off) break; // corrupted (avoid infinite loop)
    off = next;
  }
 }

 async function* iterateChildBoxes(blob, parent) {
  const start = parent.contentStart;
  const end = parent.start + parent.size;
  yield* iterateBoxes(blob, start, end);
 }

 // ---- Parsers for a few common metadata boxes ----

 async function parseFtyp(blob, box) {
  const dv = await readRange(blob, box.contentStart, Number(box.size - BigInt(box.headerBytes)));
  const majorBrand = ascii(dv, 0, 4);
  const minorVersion = be32(dv, 4);
  const brands = [];
  for (let off = 8; off + 4 <= dv.byteLength; off += 4) brands.push(ascii(dv, off, 4));
  return { majorBrand, minorVersion, compatibleBrands: brands };
 }

 async function parseMvhd(blob, box) {
  const dv = await readRange(blob, box.contentStart, Number(box.size - BigInt(box.headerBytes)));
  const version = dv.getUint8(0);
  let off = 4; // version(1) + flags(3)
  if (version === 1) {
    off += 16; // creation/modification (8+8)
    const timescale = be32(dv, off); off += 4;
    const duration = be64(dv, off);  off += 8;
    const durationSec = Number(duration) / timescale;
    return { timescale, duration: Number(duration), durationSec };
  } else {
    off += 8;  // creation/modification (4+4)
    const timescale = be32(dv, off); off += 4;
    const duration = be32(dv, off);  off += 4;
    const durationSec = duration / timescale;
    return { timescale, duration, durationSec };
  }
 }

 async function parseTkhd(blob, box) {
  const dv = await readRange(blob, box.contentStart, Number(box.size - BigInt(box.headerBytes)));
  const version = dv.getUint8(0);
  let off = 4;
  if (version === 1) {
    // creation(8) + modification(8) + trackID(4) + reserved(4) + duration(8)
    off += 8 + 8 + 4 + 4 + 8;
  } else {
    // creation(4) + modification(4) + trackID(4) + reserved(4) + duration(4)
    off += 4 + 4 + 4 + 4 + 4;
  }
  off += 8; // reserved
  off += 2 + 2 + 2 + 2; // layer(2), alternate_group(2), volume(2), reserved(2)
  off += 36; // matrix (9 * 4 bytes)
  const width = be32(dv, off) / 65536;       // 16.16 fixed
  const height = be32(dv, off + 4) / 65536;  // 16.16 fixed
  return { width, height };
 }

 async function parseMdhd(blob, box) {
  const dv = await readRange(blob, box.contentStart, Number(box.size - BigInt(box.headerBytes)));
  const version = dv.getUint8(0);
  let off = 4;
  if (version === 1) {
    off += 16; // creation/modification (8+8)
    const timescale = be32(dv, off); off += 4;
    const duration = be64(dv, off);  off += 8;
    const language = be16(dv, off) & 0x7FFF;
    return { timescale, duration: Number(duration), language };
  } else {
    off += 8; // creation/modification (4+4)
    const timescale = be32(dv, off); off += 4;
    const duration = be32(dv, off);  off += 4;
    const language = be16(dv, off) & 0x7FFF;
    return { timescale, duration, language };
  }
 }

 async function parseHdlr(blob, box) {
  const dv = await readRange(blob, box.contentStart, Number(box.size - BigInt(box.headerBytes)));
  // pre_defined(4), handler_type(4)
  const handlerType = ascii(dv, 8, 4); // 'vide', 'soun', 'meta', etc.
  return { handlerType };
 }

 // ---- Main entry: collect basic metadata ----

 async function parseMp4Metadata(blob) {
  const result = { tracks: [] };

  for await (const top of iterateBoxes(blob)) {
    if (top.type === "ftyp") {
      result.brands = await parseFtyp(blob, top);
    } else if (top.type === "moov") {
      for await (const moovChild of iterateChildBoxes(blob, top)) {
        if (moovChild.type === "mvhd") {
          const mvhd = await parseMvhd(blob, moovChild);
          result.durationSec = mvhd.durationSec;
        } else if (moovChild.type === "trak") {
          const track = {};
          for await (const trakChild of iterateChildBoxes(blob, moovChild)) {
            if (trakChild.type === "tkhd") {
              Object.assign(track, await parseTkhd(blob, trakChild));
            } else if (trakChild.type === "mdia") {
              for await (const mdiaChild of iterateChildBoxes(blob, trakChild)) {
                if (mdiaChild.type === "mdhd") {
                  const mdhd = await parseMdhd(blob, mdiaChild);
                  track.timescale = mdhd.timescale;
                  track.duration = mdhd.duration;
                } else if (mdiaChild.type === "hdlr") {
                  const h = await parseHdlr(blob, mdiaChild);
                  track.handler = h.handlerType; // 'vide'/'soun'/...
                }
              }
            }
          }
          result.tracks.push(track);
        }
      }
    }
    // We don't read 'mdat' at all—just skip it by header size.
  }

  return result;
 }

 // ---- Helper to pretty-print top-level boxes (optional) ----
 async function listTopLevelBoxes(blob) {
  const arr = [];
  for await (const b of iterateBoxes(blob)) {
    arr.push({ type: b.type, size: b.size.toString(), start: b.start.toString() });
  }
  return arr;
 }

 // ---- Example usage with a file input ----
 // <input id="file" type="file" accept="video/mp4,video/*" />
 // <pre id="out"></pre>
 {
  const input = document.getElementById("file");
  const out = document.getElementById("out");
  input.addEventListener("change", async () => {
    const file = input.files && input.files[0];
    console.log(file);
    if (!file) return;
    const meta = await parseMp4Metadata(file);
    const tl = await listTopLevelBoxes(file);
    out.textContent = JSON.stringify({ topLevel: tl, metadata: meta }, null, 2);
  });
 }

 // ---- Example usage with fetch ----
 // (async () => {
 //   const blob = await (await fetch("some.mp4")).blob();
 //   console.log(await parseMp4Metadata(blob));
 // })();
diff --git a/jsGist.json b/jsGist.json
 {"name":"mp4 parser - try 01","settings":{},"filenames":["index.html","index.css","index.js"]}
	// --- Minimal MP4 (ISO-BMFF) metadata parser in plain JS ---
	// Reads only box headers and small metadata boxes. Skips media data (mdat).

	const textDecoder = new TextDecoder("ascii");

	function be16(view, off) { return view.getUint16(off, false); }
	function be32(view, off) { return view.getUint32(off, false); }
	function be64(view, off) {
	if (typeof view.getBigUint64 === "function") return view.getBigUint64(off, false);
	// Fallback for older browsers: combine two 32-bit parts into a BigInt
	const hi = BigInt(view.getUint32(off, false));
	const lo = BigInt(view.getUint32(off + 4, false));
	return (hi << 32n) + lo;
	}
	function ascii(view, off, len) {
	return textDecoder.decode(new Uint8Array(view.buffer, view.byteOffset + off, len));
	}
	async function readRange(blob, startBig, len) {
	const start = Number(startBig);
	const slice = blob.slice(start, start + len);
	const buf = await slice.arrayBuffer();
	return new DataView(buf);
	}

	async function readBoxHeader(blob, start) {
	const fileEnd = BigInt(blob.size);
	if (start + 8n > fileEnd) return null;

	// Read the minimal 8-byte header first
	let dv = await readRange(blob, start, 8);
	let size = BigInt(be32(dv, 0));
	const type = ascii(dv, 4, 4);

	let headerBytes = 8;

	// Extended size
	if (size === 1n) {
	// Need 16 bytes total for extended size
	dv = await readRange(blob, start, 16);
	size = be64(dv, 8);
	headerBytes = 16;
	}

	// UUID box has extra 16 bytes in the header
	if (type === "uuid") {
	headerBytes += 16;
	}

	// size 0 => box extends to EOF (top-level only)
	if (size === 0n) size = fileEnd - start;

	// Basic sanity: size must include header and be positive
	if (size < BigInt(headerBytes)) return null;

	return {
	type,
	size, // total box size (including header)
	headerBytes,
	start, // offset where the box starts
	contentStart: start + BigInt(headerBytes),
	};
	}

	async function* iterateBoxes(blob, start = 0n, end = BigInt(blob.size)) {
	let off = start;
	while (off + 8n <= end) {
	const hdr = await readBoxHeader(blob, off);
	if (!hdr) break;
	yield hdr;
	const next = off + hdr.size;
	if (next <= off) break; // corrupted (avoid infinite loop)
	off = next;
	}
	}

	async function* iterateChildBoxes(blob, parent) {
	const start = parent.contentStart;
	const end = parent.start + parent.size;
	yield* iterateBoxes(blob, start, end);
	}

	// ---- Parsers for a few common metadata boxes ----

	async function parseFtyp(blob, box) {
	const dv = await readRange(blob, box.contentStart, Number(box.size - BigInt(box.headerBytes)));
	const majorBrand = ascii(dv, 0, 4);
	const minorVersion = be32(dv, 4);
	const brands = [];
	for (let off = 8; off + 4 <= dv.byteLength; off += 4) brands.push(ascii(dv, off, 4));
	return { majorBrand, minorVersion, compatibleBrands: brands };
	}

	async function parseMvhd(blob, box) {
	const dv = await readRange(blob, box.contentStart, Number(box.size - BigInt(box.headerBytes)));
	const version = dv.getUint8(0);
	let off = 4; // version(1) + flags(3)
	if (version === 1) {
	off += 16; // creation/modification (8+8)
	const timescale = be32(dv, off); off += 4;
	const duration = be64(dv, off); off += 8;
	const durationSec = Number(duration) / timescale;
	return { timescale, duration: Number(duration), durationSec };
	} else {
	off += 8; // creation/modification (4+4)
	const timescale = be32(dv, off); off += 4;
	const duration = be32(dv, off); off += 4;
	const durationSec = duration / timescale;
	return { timescale, duration, durationSec };
	}
	}

	async function parseTkhd(blob, box) {
	const dv = await readRange(blob, box.contentStart, Number(box.size - BigInt(box.headerBytes)));
	const version = dv.getUint8(0);
	let off = 4;
	if (version === 1) {
	// creation(8) + modification(8) + trackID(4) + reserved(4) + duration(8)
	off += 8 + 8 + 4 + 4 + 8;
	} else {
	// creation(4) + modification(4) + trackID(4) + reserved(4) + duration(4)
	off += 4 + 4 + 4 + 4 + 4;
	}
	off += 8; // reserved
	off += 2 + 2 + 2 + 2; // layer(2), alternate_group(2), volume(2), reserved(2)
	off += 36; // matrix (9 * 4 bytes)
	const width = be32(dv, off) / 65536; // 16.16 fixed
	const height = be32(dv, off + 4) / 65536; // 16.16 fixed
	return { width, height };
	}

	async function parseMdhd(blob, box) {
	const dv = await readRange(blob, box.contentStart, Number(box.size - BigInt(box.headerBytes)));
	const version = dv.getUint8(0);
	let off = 4;
	if (version === 1) {
	off += 16; // creation/modification (8+8)
	const timescale = be32(dv, off); off += 4;
	const duration = be64(dv, off); off += 8;
	const language = be16(dv, off) & 0x7FFF;
	return { timescale, duration: Number(duration), language };
	} else {
	off += 8; // creation/modification (4+4)
	const timescale = be32(dv, off); off += 4;
	const duration = be32(dv, off); off += 4;
	const language = be16(dv, off) & 0x7FFF;
	return { timescale, duration, language };
	}
	}

	async function parseHdlr(blob, box) {
	const dv = await readRange(blob, box.contentStart, Number(box.size - BigInt(box.headerBytes)));
	// pre_defined(4), handler_type(4)
	const handlerType = ascii(dv, 8, 4); // 'vide', 'soun', 'meta', etc.
	return { handlerType };
	}

	// ---- Main entry: collect basic metadata ----

	async function parseMp4Metadata(blob) {
	const result = { tracks: [] };

	for await (const top of iterateBoxes(blob)) {
	if (top.type === "ftyp") {
	result.brands = await parseFtyp(blob, top);
	} else if (top.type === "moov") {
	for await (const moovChild of iterateChildBoxes(blob, top)) {
	if (moovChild.type === "mvhd") {
	const mvhd = await parseMvhd(blob, moovChild);
	result.durationSec = mvhd.durationSec;
	} else if (moovChild.type === "trak") {
	const track = {};
	for await (const trakChild of iterateChildBoxes(blob, moovChild)) {
	if (trakChild.type === "tkhd") {
	Object.assign(track, await parseTkhd(blob, trakChild));
	} else if (trakChild.type === "mdia") {
	for await (const mdiaChild of iterateChildBoxes(blob, trakChild)) {
	if (mdiaChild.type === "mdhd") {
	const mdhd = await parseMdhd(blob, mdiaChild);
	track.timescale = mdhd.timescale;
	track.duration = mdhd.duration;
	} else if (mdiaChild.type === "hdlr") {
	const h = await parseHdlr(blob, mdiaChild);
	track.handler = h.handlerType; // 'vide'/'soun'/...
	}
	}
	}
	}
	result.tracks.push(track);
	}
	}
	}
	// We don't read 'mdat' at all—just skip it by header size.
	}

	return result;
	}

	// ---- Helper to pretty-print top-level boxes (optional) ----
	async function listTopLevelBoxes(blob) {
	const arr = [];
	for await (const b of iterateBoxes(blob)) {
	arr.push({ type: b.type, size: b.size.toString(), start: b.start.toString() });
	}
	return arr;
	}

	// ---- Example usage with a file input ----
	// <input id="file" type="file" accept="video/mp4,video/*" />
	// <pre id="out"></pre>
	{
	const input = document.getElementById("file");
	const out = document.getElementById("out");
	input.addEventListener("change", async () => {
	const file = input.files && input.files[0];
	console.log(file);
	if (!file) return;
	const meta = await parseMp4Metadata(file);
	const tl = await listTopLevelBoxes(file);
	out.textContent = JSON.stringify({ topLevel: tl, metadata: meta }, null, 2);
	});
	}

	// ---- Example usage with fetch ----
	// (async () => {
	// const blob = await (await fetch("some.mp4")).blob();
	// console.log(await parseMp4Metadata(blob));
	// })();