Created
September 1, 2025 06:56
-
-
Save greggman/cc4b70e04538093497ef2a5a49d11480 to your computer and use it in GitHub Desktop.
mp4 parser - try 02
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
html, body { | |
height: 100%; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<input id="file" type="file"> | |
<pre id="out"></pre> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ---- Minimal MP4 title/comment extractor (plain JS) ---- | |
const tdAscii = new TextDecoder("ascii"); | |
const tdUtf8 = new TextDecoder("utf-8"); | |
const tdUtf16BE = new TextDecoder("utf-16be"); | |
function be16(v,o){return v.getUint16(o,false)} | |
function be32(v,o){return v.getUint32(o,false)} | |
function be64(v,o){ | |
if (typeof v.getBigUint64 === "function") return v.getBigUint64(o,false); | |
const hi = BigInt(be32(v,o)), lo = BigInt(be32(v,o+4)); | |
return (hi<<32n)+lo; | |
} | |
function ascii(v,o,l){return tdAscii.decode(new Uint8Array(v.buffer, v.byteOffset+o, l))} | |
async function readRange(blob, startBig, len){ | |
const slice = blob.slice(Number(startBig), Number(startBig)+len); | |
return new DataView(await slice.arrayBuffer()); | |
} | |
function fourCCfromCode(code){ | |
const bytes = new Uint8Array(4); | |
bytes[0]=(code>>>24)&0xff; bytes[1]=(code>>>16)&0xff; | |
bytes[2]=(code>>>8)&0xff; bytes[3]=code&0xff; | |
return tdAscii.decode(bytes); | |
} | |
async function readBoxHeader(blob, start){ | |
const end = BigInt(blob.size); | |
if (start+8n > end) return null; | |
let dv = await readRange(blob, start, 8); | |
let size = BigInt(be32(dv,0)); | |
const typeCode = be32(dv,4); | |
const type = fourCCfromCode(typeCode); | |
let headerBytes = 8; | |
if (size === 1n){ | |
dv = await readRange(blob, start, 16); | |
size = be64(dv,8); | |
headerBytes = 16; | |
} | |
if (type === "uuid") headerBytes += 16; | |
if (size === 0n) size = end - start; // to EOF (top-level) | |
if (size < BigInt(headerBytes)) return null; | |
return { | |
type, typeCode, size, headerBytes, | |
start, contentStart: start + BigInt(headerBytes) | |
}; | |
} | |
async function* iterateBoxes(blob, start=0n, end=BigInt(blob.size)){ | |
let off = start; | |
while (off + 8n <= end){ | |
const h = await readBoxHeader(blob, off); | |
if (!h) break; | |
yield h; | |
const next = off + h.size; | |
if (next <= off) break; // corrupted | |
off = next; | |
} | |
} | |
async function* iterateChildren(blob, parent, contentSkip=0n){ | |
const start = parent.contentStart + contentSkip; | |
const end = parent.start + parent.size; | |
yield* iterateBoxes(blob, start, end); | |
} | |
// ---- Helpers to decode ilst/data/freeform/keys ---- | |
// iTunes 'data' box usually has: version(1) + flags(3) [flags = type], then 4 bytes reserved, then payload. | |
// Some files use "type + locale" for the first 8 bytes. Handle both. | |
function decodeIlstDataPayload(dv){ | |
if (dv.byteLength < 4) return ""; | |
let dataType, payloadOff; | |
const version = dv.getUint8(0); | |
const flags = (dv.getUint8(1)<<16) | (dv.getUint8(2)<<8) | dv.getUint8(3); | |
const reserved = dv.byteLength >= 8 ? be32(dv,4) : 0; | |
if (version === 0 && dv.byteLength >= 8 && (reserved === 0 || reserved === 0x00000000)) { | |
dataType = flags; payloadOff = 8; // "fullbox + reserved" layout | |
} else { | |
dataType = be32(dv,0); payloadOff = 8; // "type + locale" layout | |
} | |
const bytes = new Uint8Array(dv.buffer, dv.byteOffset + payloadOff, dv.byteLength - payloadOff); | |
if (dataType === 1) { // UTF-8 | |
return tdUtf8.decode(bytes).replace(/\0+$/,""); | |
} else if (dataType === 2) { // UTF-16BE | |
return tdUtf16BE.decode(bytes).replace(/\0+$/,""); | |
} else { | |
// Try UTF-8 as a best-effort | |
return tdUtf8.decode(bytes).replace(/\0+$/,""); | |
} | |
} | |
async function readFirstDataString(blob, itemBox){ | |
for await (const child of iterateChildren(blob, itemBox)){ | |
if (child.type === "data"){ | |
const dv = await readRange(blob, child.contentStart, Number(child.size - BigInt(child.headerBytes))); | |
return decodeIlstDataPayload(dv).trim(); | |
} | |
} | |
return ""; | |
} | |
// Parse freeform iTunes '----' item: contains 'mean', 'name', and one/more 'data' | |
async function parseFreeformItem(blob, itemBox){ | |
let mean = "", name = "", value = ""; | |
for await (const child of iterateChildren(blob, itemBox)){ | |
if (child.type === "mean" || child.type === "name"){ | |
const dv = await readRange(blob, child.contentStart, Number(child.size - BigInt(child.headerBytes))); | |
// These boxes are FullBoxes; skip version+flags (4 bytes) if present | |
let off = 0; | |
if (dv.byteLength >= 4) off = 4; | |
const str = tdUtf8.decode(new Uint8Array(dv.buffer, dv.byteOffset + off, dv.byteLength - off)).trim(); | |
if (child.type === "mean") mean = str; | |
else name = str; | |
} else if (child.type === "data" && !value){ | |
const dv = await readRange(blob, child.contentStart, Number(child.size - BigInt(child.headerBytes))); | |
value = decodeIlstDataPayload(dv).trim(); | |
} | |
} | |
return { mean, name, value }; | |
} | |
// Parse QuickTime 'keys' box => 1-based array of key names | |
async function parseKeysBox(blob, keysBox){ | |
const dv = await readRange(blob, keysBox.contentStart, Number(keysBox.size - BigInt(keysBox.headerBytes))); | |
if (dv.byteLength < 8) return []; | |
let off = 0; | |
// version + flags | |
off += 4; | |
const count = be32(dv, off); off += 4; | |
const keys = []; | |
for (let i = 1; i <= count; i++){ | |
if (off + 8 > dv.byteLength) break; | |
const keySize = be32(dv, off); | |
const ns = ascii(dv, off+4, 4); // e.g., 'mdta' | |
const valBytes = keySize - 8; | |
const val = tdUtf8.decode(new Uint8Array(dv.buffer, dv.byteOffset + off + 8, valBytes)); | |
keys[i] = (ns ? ns + ":" : "") + val; | |
off += keySize; | |
} | |
return keys; | |
} | |
// Given an 'ilst' box (and optional keys), pull title/comment | |
async function parseIlstTitleComment(blob, ilstBox, keysMap){ | |
const out = { title: null, comment: null }; | |
for await (const item of iterateChildren(blob, ilstBox)){ | |
// (A) Common iTunes atoms | |
if (item.type === "©nam" && !out.title){ | |
out.title = await readFirstDataString(blob, item); | |
} else if (item.type === "©cmt" && !out.comment){ | |
out.comment = await readFirstDataString(blob, item); | |
} | |
// (B) Freeform '----' (mean/name/data) | |
else if (item.type === "----"){ | |
const ff = await parseFreeformItem(blob, item); | |
const key = (ff.mean + ":" + ff.name).toLowerCase(); | |
if (!out.title && (ff.name.toLowerCase() === "title" || key.includes("quicktime:title") || key.includes("itunes:title"))) { | |
out.title = ff.value || out.title; | |
} | |
if (!out.comment && (ff.name.toLowerCase() === "comment" || key.includes("quicktime:comment") || key.includes("itunes:comment"))) { | |
out.comment = ff.value || out.comment; | |
} | |
} | |
// (C) QuickTime 'keys' mapping — ilst children may have a numeric 4CC (contains NULs) | |
else if (item.type.includes("\u0000") && keysMap && keysMap.length){ | |
const keyId = item.typeCode >>> 0; // uint32 | |
const keyName = (keysMap[keyId] || "").toLowerCase(); | |
if (keyName){ | |
const val = await readFirstDataString(blob, item); | |
if (!out.title && (keyName.endsWith("title") || keyName.includes("quicktime:title"))) out.title = val || out.title; | |
if (!out.comment && (keyName.endsWith("comment") || keyName.includes("quicktime:comment"))) out.comment = val || out.comment; | |
} | |
} | |
if (out.title && out.comment) break; | |
} | |
return out; | |
} | |
// Find all meta/ilst containers we care about and try them in order | |
async function extractTitleComment(blob){ | |
let found = { title: null, comment: null }; | |
for await (const top of iterateBoxes(blob)){ | |
if (top.type !== "moov") continue; | |
// Search both moov/meta and moov/udta/meta | |
const metaBoxes = []; | |
for await (const c of iterateChildren(blob, top)){ | |
if (c.type === "meta") metaBoxes.push(c); | |
if (c.type === "udta"){ | |
for await (const u of iterateChildren(blob, c)){ | |
if (u.type === "meta") metaBoxes.push(u); | |
} | |
} | |
} | |
for (const meta of metaBoxes){ | |
// 'meta' is a FullBox => children start after 4 bytes (version+flags) | |
let keysMap = []; | |
let ilstBox = null; | |
for await (const mchild of iterateChildren(blob, meta, 4n)){ | |
if (mchild.type === "keys") keysMap = await parseKeysBox(blob, mchild); | |
if (mchild.type === "ilst") ilstBox = mchild; | |
} | |
if (ilstBox){ | |
const part = await parseIlstTitleComment(blob, ilstBox, keysMap); | |
found.title = found.title || part.title; | |
found.comment = found.comment || part.comment; | |
if (found.title && found.comment) return found; | |
} | |
} | |
} | |
return found; // may be nulls if not present | |
} | |
// ---- Example: wire up to an <input type="file"> ---- | |
// <input id="file" type="file" accept="video/mp4,video/*" /> | |
// <pre id="out"></pre> | |
{ | |
const input = document.getElementById("file"); | |
const out = document.getElementById("out"); | |
input.addEventListener("change", async () => { | |
const file = input.files && input.files[0]; | |
if (!file) return; | |
const result = await extractTitleComment(file); | |
out.textContent = JSON.stringify(result, null, 2); | |
}); | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"name":"mp4 parser - try 02","settings":{},"filenames":["index.html","index.css","index.js"]} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment