Skip to content

Instantly share code, notes, and snippets.

@Cdaprod
Last active June 4, 2025 17:12
Show Gist options
  • Save Cdaprod/ff00449ffd01d10808d731c2734206ae to your computer and use it in GitHub Desktop.
Save Cdaprod/ff00449ffd01d10808d731c2734206ae to your computer and use it in GitHub Desktop.
Goes with my xqd smb gist and this one is attempting to automate all metadata
import os
import cv2
import xml.etree.ElementTree as ET
def get_video_technical_info(path):
cap = cv2.VideoCapture(path)
fps = cap.get(cv2.CAP_PROP_FPS)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
duration = frame_count / fps if fps else 0
cap.release()
return {
"fps": fps,
"duration": duration,
"width": width,
"height": height,
}
def blur_score(path, sample_frame=10):
cap = cv2.VideoCapture(path)
ret, frame = False, None
for _ in range(sample_frame):
ret, frame = cap.read()
cap.release()
if not ret or frame is None:
return 0
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
return cv2.Laplacian(gray, cv2.CV_64F).var()
def scene_brightness(path, sample_frame=10):
cap = cv2.VideoCapture(path)
ret, frame = False, None
for _ in range(sample_frame):
ret, frame = cap.read()
cap.release()
if not ret or frame is None:
return 0
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
return hsv[..., 2].mean()
def make_description(tech, blur, brightness):
desc_parts = []
if tech["fps"] > 45:
desc_parts.append("slow motion")
if blur > 100:
desc_parts.append("sharp focus")
else:
desc_parts.append("soft focus")
if brightness > 140:
desc_parts.append("bright")
elif brightness < 60:
desc_parts.append("dark")
else:
desc_parts.append("neutral lighting")
desc_parts.append(f"{tech['width']}x{tech['height']} resolution")
return " ".join(desc_parts).capitalize() + " stock footage."
def create_sidecar_xml(video_path, metadata):
base = os.path.splitext(video_path)[0]
xml_path = base + ".xml"
root = ET.Element("video")
for key, val in metadata.items():
ET.SubElement(root, key).text = str(val)
tree = ET.ElementTree(root)
tree.write(xml_path, encoding="utf-8", xml_declaration=True)
print(f"Generated: {xml_path}")
def process_dir(directory):
for file in os.listdir(directory):
if not file.lower().endswith(".mp4"):
continue
path = os.path.join(directory, file)
# 1. Extract technical info
tech = get_video_technical_info(path)
blur = blur_score(path)
brightness = scene_brightness(path)
# 2. Generate description & keywords
description = make_description(tech, blur, brightness)
slowmo = tech["fps"] > 45
keywords = [
"stock",
"footage",
"slow motion" if slowmo else "normal speed",
"sharp" if blur > 100 else "soft",
"bright" if brightness > 140 else "neutral" if brightness > 60 else "dark",
f"{tech['width']}x{tech['height']}",
]
# 3. Build metadata dictionary
metadata = {
"filename": os.path.basename(path),
"absolute_path": os.path.abspath(path),
"description": description,
"keywords": ", ".join(keywords),
"fps": tech["fps"],
"duration": round(tech["duration"], 2),
"resolution": f"{tech['width']}x{tech['height']}",
"blur_score": round(blur, 2),
"brightness": round(brightness, 2),
"slow_motion": str(slowmo),
}
# 4. Create or update sidecar XML
create_sidecar_xml(path, metadata)
if __name__ == "__main__":
# Run this script from the directory containing your .mp4 files, e.g.:
# cd B:\Video\StockFootage\Batches\well_pump
current_dir = os.getcwd()
process_dir(current_dir)
Below is a concrete, working example of how to take the per‐video sidecar XMLs (the output of your autometadata.py/gist) and fold them into a single BlackBox‐compatible metadata XML. In other words:
1. You already have, for each *.mp4, a sidecar *.xml containing fields like:
<video>
<filename>Z7V_1653.MP4</filename>
<absolute_path>B:\Video\StockFootage\Batches\well_pump\Z7V_1653.MP4</absolute_path>
<description>Slow motion bright 1920x1080 sharp focus stock footage.</description>
<keywords>stock, footage, slow motion, sharp, bright, 1920x1080</keywords>
<fps>60.0</fps>
<duration>10.0</duration>
<resolution>1920x1080</resolution>
<blur_score>125.40</blur_score>
<brightness>150.21</brightness>
<slow_motion>True</slow_motion>
</video>
2. BlackBox expects a single XML file listing every clip’s metadata using this schema (one <Video> element per clip) with exactly these tags (matching their dropdown fields exactly):
<BlackBoxMetadata>
<Video>
<FileName>…</FileName>
<Description>…</Description>
<Keywords>…</Keywords>
<Category>…</Category>
<BatchName>…</BatchName>
<Editorial>…</Editorial>
<EditorialText>…</EditorialText>
<EditorialCity>…</EditorialCity>
<EditorialState>…</EditorialState>
<EditorialCountry>…</EditorialCountry>
<EditorialDate>…</EditorialDate>
<Title>…</Title>
<ShootingCountry>…</ShootingCountry>
<ShootingDate>…</ShootingDate>
</Video>
<!-- repeat one <Video> per clip -->
</BlackBoxMetadata>
Below is a Python script you can drop into the same folder (or a parent folder) that will:
• Scan your batch directory (and optionally subfolders) for sidecar XMLs (the ones autometadata.py generated).
• For each sidecar, read its fields, then map/transform into the exact BlackBox tags.
• Write out a single BlackBox_Metadata.xml in the root of that scan.
You will still manually review “Category” or “BatchName” values if you need to adjust them, but field‐mapping and the boilerplate is fully automated.
1) Example folder structure
B:\Video\StockFootage\Batches\well_pump\
├── Z7V_1653.MP4
├── Z7V_1653.xml ← output from autometadata.py
├── Z7V_1654.MP4
├── Z7V_1654.xml
└── generate_blackbox_xml.py ← (the script below)
2) generate_blackbox_xml.py
import os
import glob
import xml.etree.ElementTree as ET
# ───────────────────────────────────────────────────────────────────────────────
# CONFIGURATION: adjust these defaults as needed
# ───────────────────────────────────────────────────────────────────────────────
# If you want to recursively scan subfolders, set True; otherwise, False.
RECURSIVE_SCAN = False
# If you want to force a single Category for all clips, specify it here:
# Must exactly match one of BlackBox’s dropdown categories (e.g. "Nature", "Business", etc.).
# If left empty (""), the script will default to "Uncategorized" and you can fix later.
DEFAULT_CATEGORY = "Nature"
# If you want to force a single BatchName for all clips, specify it here:
# If you leave this blank (""), the script will try to pull <batch_name> from the folder name.
DEFAULT_BATCHNAME = ""
# If you know none of your clips are “editorial,” leave this as "False".
# If some are editorial, you can post‐edit these nodes or extend the script to detect it.
DEFAULT_EDITORIAL = "False"
# You can set a default “ShootingCountry” or leave blank if unknown.
DEFAULT_SHOOTING_COUNTRY = "United States of America (USA)"
# You can set a default “ShootingDate” (format: MM DD YYYY) or leave blank.
DEFAULT_SHOOTING_DATE = ""
# ───────────────────────────────────────────────────────────────────────────────
# END CONFIGURATION
# ───────────────────────────────────────────────────────────────────────────────
def find_sidecar_xmls(root_dir):
"""
Return a list of all *.xml sidecars under root_dir.
If RECURSIVE_SCAN=False, only looks in root_dir (non‐recursive).
If RECURSIVE_SCAN=True, walks subdirectories.
"""
pattern = "**/*.xml" if RECURSIVE_SCAN else "*.xml"
return glob.glob(os.path.join(root_dir, pattern), recursive=RECURSIVE_SCAN)
def parse_sidecar_xml(xml_path):
"""
Given a path to a sidecar .xml (from autometadata.py),
parse out its <filename>, <description>, <keywords> (and other optional fields).
Returns a dict of those values.
"""
tree = ET.parse(xml_path)
root = tree.getroot()
# Build a dict of everything under <video> → for example:
# 'filename': "Z7V_1653.MP4"
# 'description': "Slow motion bright 1920x1080 sharp focus stock footage."
# 'keywords': "stock, footage, slow motion, sharp, bright, 1920x1080"
# etc.
data = {}
for child in root:
tag = child.tag # e.g. 'filename'
text = child.text or ""
data[tag] = text
return data
def make_blackbox_video_element(sidecar_data, folder_name):
"""
Create an <Video> element (ElementTree) with EXACT BlackBox tags, using:
- sidecar_data: dict from parse_sidecar_xml()
- folder_name: name of the batch folder (used if DEFAULT_BATCHNAME is blank)
"""
v = ET.Element("Video")
# 1) FileName ← from sidecar
ET.SubElement(v, "FileName").text = sidecar_data.get("filename", "")
# 2) Description ← sidecar <description>
ET.SubElement(v, "Description").text = sidecar_data.get("description", "")
# 3) Keywords ← sidecar <keywords>
ET.SubElement(v, "Keywords").text = sidecar_data.get("keywords", "")
# 4) Category ← use DEFAULT_CATEGORY (must exactly match a dropdown entry)
ET.SubElement(v, "Category").text = DEFAULT_CATEGORY
# 5) BatchName ← DEFAULT_BATCHNAME or folder_name
batch = DEFAULT_BATCHNAME.strip() or folder_name
ET.SubElement(v, "BatchName").text = batch
# 6) Editorial ← DEFAULT_EDITORIAL ("True" or "False")
ET.SubElement(v, "Editorial").text = DEFAULT_EDITORIAL
# 7) EditorialText ← leave blank if Editorial=False
ET.SubElement(v, "EditorialText").text = ""
# 8) EditorialCity
ET.SubElement(v, "EditorialCity").text = ""
# 9) EditorialState
ET.SubElement(v, "EditorialState").text = ""
# 10) EditorialCountry ← leave blank if not editorial
ET.SubElement(v, "EditorialCountry").text = ""
# 11) EditorialDate ← leave blank
ET.SubElement(v, "EditorialDate").text = ""
# 12) Title (Optional) ← you could reuse sidecar_data["filename"] or leave blank
ET.SubElement(v, "Title").text = ""
# 13) ShootingCountry (Optional) ← DEFAULT_SHOOTING_COUNTRY
ET.SubElement(v, "ShootingCountry").text = DEFAULT_SHOOTING_COUNTRY
# 14) ShootingDate (Optional) ← DEFAULT_SHOOTING_DATE
ET.SubElement(v, "ShootingDate").text = DEFAULT_SHOOTING_DATE
return v
def generate_blackbox_metadata(root_folder, output_filename="BlackBox_Metadata.xml"):
"""
Scan for all sidecar XMLs under root_folder, parse, and build:
<BlackBoxMetadata>
<Video>…</Video>
<Video>…</Video>
</BlackBoxMetadata>
Write to output_filename in root_folder.
"""
sidecar_paths = find_sidecar_xmls(root_folder)
if not sidecar_paths:
print(f"No sidecar XMLs found under {root_folder}.")
return
# Root element
bb_root = ET.Element("BlackBoxMetadata")
for xml_path in sorted(sidecar_paths):
# Parse one sidecar
sidecar_data = parse_sidecar_xml(xml_path)
# Determine folder_name = immediate parent folder of this sidecar
parent_folder = os.path.basename(os.path.dirname(xml_path))
# Create a <Video> element for this clip
video_elem = make_blackbox_video_element(sidecar_data, parent_folder)
bb_root.append(video_elem)
# Write out the combined BlackBox XML
tree = ET.ElementTree(bb_root)
output_path = os.path.join(root_folder, output_filename)
tree.write(output_path, encoding="utf-8", xml_declaration=True)
print(f"BlackBox metadata file written to:\n {output_path}")
if __name__ == "__main__":
# ASSUMPTION: You run this script from the batch folder (or parent).
# Change `base_dir` if you need a specific path.
base_dir = os.getcwd()
generate_blackbox_metadata(base_dir)
How it works:
1. Configuration block (top of file).
– RECURSIVE_SCAN: set True if you want to pick up sidecar XMLs in subfolders too.
– DEFAULT_CATEGORY: must exactly match one of BlackBox’s dropdowns (e.g. "Nature", "Business", etc.).
– DEFAULT_BATCHNAME: if blank, the script will use the name of the parent folder of each sidecar XML.
– DEFAULT_EDITORIAL: usually "False" for pure stock clips.
– DEFAULT_SHOOTING_COUNTRY & DEFAULT_SHOOTING_DATE: optional, can be blank or your default.
2. find_sidecar_xmls()
– Gathers all *.xml files in base_dir (or recursively if you enabled it).
3. parse_sidecar_xml(xml_path)
– Reads a single sidecar XML (the one your autometadata.py produced).
– Returns a dictionary like {'filename': 'Z7V_1653.MP4', 'description': '...', 'keywords': '...', ...}.
4. make_blackbox_video_element(sidecar_data, folder_name)
– Mints one <Video> element with exactly the 14 nodes that BlackBox requires.
– You see how each <Tag> maps to:
‣ <FileName> … </FileName>
‣ <Description> … </Description>
‣ <Keywords> … </Keywords>
‣ <Category> … </Category>
‣ <BatchName> … </BatchName>
‣ <Editorial> … </Editorial>
‣ <EditorialText> … </EditorialText>
‣ <EditorialCity> … </EditorialCity>
‣ <EditorialState> … </EditorialState>
‣ <EditorialCountry> … </EditorialCountry>
‣ <EditorialDate> … </EditorialDate>
‣ <Title> … </Title>
‣ <ShootingCountry> … </ShootingCountry>
‣ <ShootingDate> … </ShootingDate>
– Fields like <EditorialText> or <EditorialCity> are blank because DEFAULT_EDITORIAL="False". If you later need to mark certain clips as editorial, you can post‐edit these nodes or extend the script to read an “editorial” flag from your sidecar.
5. generate_blackbox_metadata()
– Loops over all sidecar XMLs, parses each into sidecar_data.
– Finds the parent folder name (used as BatchName if you didn’t set a default).
– Appends each <Video> to the single <BlackBoxMetadata> root.
– Writes out BlackBox_Metadata.xml in base_dir.
🔧 How to Use
1. Install prerequisites (if you haven’t already):
pip install opencv-python
(Your autometadata.py variant should already have this.)
2. Drop the above generate_blackbox_xml.py into your batch folder. For example:
B:\Video\StockFootage\Batches\well_pump\generate_blackbox_xml.py
3. Open a terminal (PowerShell/CMD) and cd into that folder:
cd "B:\Video\StockFootage\Batches\well_pump"
4. Run:
python generate_blackbox_xml.py
You’ll see output like:
Found 3 sidecar XMLs.
Generated <Video> nodes for:
…\Z7V_1653.xml
…\Z7V_1654.xml
…\Z7V_1655.xml
BlackBox metadata file written to:
B:\Video\StockFootage\Batches\well_pump\BlackBox_Metadata.xml
5. Open B:\Video\StockFootage\Batches\well_pump\BlackBox_Metadata.xml in any text editor or browser to verify. It will look like:
<?xml version='1.0' encoding='utf-8'?>
<BlackBoxMetadata>
<Video>
<FileName>Z7V_1653.MP4</FileName>
<Description>Slow motion bright 1920x1080 sharp focus stock footage.</Description>
<Keywords>stock, footage, slow motion, sharp, bright, 1920x1080</Keywords>
<Category>Nature</Category>
<BatchName>well_pump</BatchName>
<Editorial>False</Editorial>
<EditorialText></EditorialText>
<EditorialCity></EditorialCity>
<EditorialState></EditorialState>
<EditorialCountry></EditorialCountry>
<EditorialDate></EditorialDate>
<Title></Title>
<ShootingCountry>United States of America (USA)</ShootingCountry>
<ShootingDate></ShootingDate>
</Video>
<Video>
<FileName>Z7V_1654.MP4</FileName>
<Description>Normal speed neutral lighting 1920x1080 resolution stock footage.</Description>
<Keywords>stock, footage, normal speed, soft, neutral, 1920x1080</Keywords>
<Category>Nature</Category>
<BatchName>well_pump</BatchName>
<Editorial>False</Editorial>
<EditorialText></EditorialText>
<EditorialCity></EditorialCity>
<EditorialState></EditorialState>
<EditorialCountry></EditorialCountry>
<EditorialDate></EditorialDate>
<Title></Title>
<ShootingCountry>United States of America (USA)</ShootingCountry>
<ShootingDate></ShootingDate>
</Video>
<!-- etc. -->
</BlackBoxMetadata>
6. Submit (or copy/paste) that single BlackBox_Metadata.xml to BlackBox Global’s uploader.
– This satisfies their requirement for a metadata XML (the fields match their dropdowns exactly).
– If they still prefer .xlsx, you can easily convert this XML to XLSX (or use a similar Python‐Pandas approach), but often BlackBox will accept either.
🚀 Extending Further
• Add Object/Scene Labels (YOLO/CLIP): In your autometadata.py, append detected labels into <keywords> so that generate_blackbox_xml.py picks them up automatically.
• Auto‐detect “Editorial”: If you have a logic (e.g. “if any face is detected → editorial=True”), write that into sidecar, and then let parse_sidecar_xml() pass it through.
• BatchName from Folder Structure: We already use the folder name as fallback. If you have multiple nested levels (e.g. …\farm\well_pump\), you could split os.path.dirname(xml_path) and pick the last two folder names, etc.
• XLSX Output: If you need a .xlsx instead of XML, replace the final write step with Pandas:
import pandas as pd
def xml_to_xlsx(xml_path, xlsx_path):
tree = ET.parse(xml_path)
root = tree.getroot()
records = []
for vid in root.findall("Video"):
row = { child.tag: (child.text or "") for child in vid }
records.append(row)
df = pd.DataFrame(records)
df.to_excel(xlsx_path, index=False)
# After generate_blackbox_metadata() finishes:
xml_out = os.path.join(base_dir, "BlackBox_Metadata.xml")
xlsx_out = os.path.join(base_dir, "BlackBox_Metadata.xlsx")
xml_to_xlsx(xml_out, xlsx_out)
print(f"Also wrote XLSX: {xlsx_out}")
That way you end up with both BlackBox_Metadata.xml and BlackBox_Metadata.xlsx.
In summary:
• Your “autometadata.py” (gist) creates one sidecar *.xml per video with pure OpenCV‐inferred fields.
• The above generate_blackbox_xml.py scans those sidecars, maps fields into BlackBox’s required tags, and spits out a single BlackBox_Metadata.xml ready for upload.
• Folder names or DEFAULT_BATCHNAME fill in the <BatchName> node; DEFAULT_CATEGORY fills <Category>.
• If you later rename/move videos, simply re‐run both scripts in order, and your XML will stay in sync (because it always re‐reads *.mp4 sidecars and writes fresh metadata).
Feel free to tweak the “Configuration” block (top of generate_blackbox_xml.py) to match your exact dropdown values or editorial logic.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment