Last active
June 4, 2025 17:12
-
-
Save Cdaprod/ff00449ffd01d10808d731c2734206ae to your computer and use it in GitHub Desktop.
Goes with my xqd smb gist and this one is attempting to automate all metadata
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import cv2 | |
import xml.etree.ElementTree as ET | |
def get_video_technical_info(path): | |
cap = cv2.VideoCapture(path) | |
fps = cap.get(cv2.CAP_PROP_FPS) | |
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) | |
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
duration = frame_count / fps if fps else 0 | |
cap.release() | |
return { | |
"fps": fps, | |
"duration": duration, | |
"width": width, | |
"height": height, | |
} | |
def blur_score(path, sample_frame=10): | |
cap = cv2.VideoCapture(path) | |
ret, frame = False, None | |
for _ in range(sample_frame): | |
ret, frame = cap.read() | |
cap.release() | |
if not ret or frame is None: | |
return 0 | |
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) | |
return cv2.Laplacian(gray, cv2.CV_64F).var() | |
def scene_brightness(path, sample_frame=10): | |
cap = cv2.VideoCapture(path) | |
ret, frame = False, None | |
for _ in range(sample_frame): | |
ret, frame = cap.read() | |
cap.release() | |
if not ret or frame is None: | |
return 0 | |
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) | |
return hsv[..., 2].mean() | |
def make_description(tech, blur, brightness): | |
desc_parts = [] | |
if tech["fps"] > 45: | |
desc_parts.append("slow motion") | |
if blur > 100: | |
desc_parts.append("sharp focus") | |
else: | |
desc_parts.append("soft focus") | |
if brightness > 140: | |
desc_parts.append("bright") | |
elif brightness < 60: | |
desc_parts.append("dark") | |
else: | |
desc_parts.append("neutral lighting") | |
desc_parts.append(f"{tech['width']}x{tech['height']} resolution") | |
return " ".join(desc_parts).capitalize() + " stock footage." | |
def create_sidecar_xml(video_path, metadata): | |
base = os.path.splitext(video_path)[0] | |
xml_path = base + ".xml" | |
root = ET.Element("video") | |
for key, val in metadata.items(): | |
ET.SubElement(root, key).text = str(val) | |
tree = ET.ElementTree(root) | |
tree.write(xml_path, encoding="utf-8", xml_declaration=True) | |
print(f"Generated: {xml_path}") | |
def process_dir(directory): | |
for file in os.listdir(directory): | |
if not file.lower().endswith(".mp4"): | |
continue | |
path = os.path.join(directory, file) | |
# 1. Extract technical info | |
tech = get_video_technical_info(path) | |
blur = blur_score(path) | |
brightness = scene_brightness(path) | |
# 2. Generate description & keywords | |
description = make_description(tech, blur, brightness) | |
slowmo = tech["fps"] > 45 | |
keywords = [ | |
"stock", | |
"footage", | |
"slow motion" if slowmo else "normal speed", | |
"sharp" if blur > 100 else "soft", | |
"bright" if brightness > 140 else "neutral" if brightness > 60 else "dark", | |
f"{tech['width']}x{tech['height']}", | |
] | |
# 3. Build metadata dictionary | |
metadata = { | |
"filename": os.path.basename(path), | |
"absolute_path": os.path.abspath(path), | |
"description": description, | |
"keywords": ", ".join(keywords), | |
"fps": tech["fps"], | |
"duration": round(tech["duration"], 2), | |
"resolution": f"{tech['width']}x{tech['height']}", | |
"blur_score": round(blur, 2), | |
"brightness": round(brightness, 2), | |
"slow_motion": str(slowmo), | |
} | |
# 4. Create or update sidecar XML | |
create_sidecar_xml(path, metadata) | |
if __name__ == "__main__": | |
# Run this script from the directory containing your .mp4 files, e.g.: | |
# cd B:\Video\StockFootage\Batches\well_pump | |
current_dir = os.getcwd() | |
process_dir(current_dir) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Below is a concrete, working example of how to take the per‐video sidecar XMLs (the output of your autometadata.py/gist) and fold them into a single BlackBox‐compatible metadata XML. In other words: | |
1. You already have, for each *.mp4, a sidecar *.xml containing fields like: | |
<video> | |
<filename>Z7V_1653.MP4</filename> | |
<absolute_path>B:\Video\StockFootage\Batches\well_pump\Z7V_1653.MP4</absolute_path> | |
<description>Slow motion bright 1920x1080 sharp focus stock footage.</description> | |
<keywords>stock, footage, slow motion, sharp, bright, 1920x1080</keywords> | |
<fps>60.0</fps> | |
<duration>10.0</duration> | |
<resolution>1920x1080</resolution> | |
<blur_score>125.40</blur_score> | |
<brightness>150.21</brightness> | |
<slow_motion>True</slow_motion> | |
</video> | |
2. BlackBox expects a single XML file listing every clip’s metadata using this schema (one <Video> element per clip) with exactly these tags (matching their dropdown fields exactly): | |
<BlackBoxMetadata> | |
<Video> | |
<FileName>…</FileName> | |
<Description>…</Description> | |
<Keywords>…</Keywords> | |
<Category>…</Category> | |
<BatchName>…</BatchName> | |
<Editorial>…</Editorial> | |
<EditorialText>…</EditorialText> | |
<EditorialCity>…</EditorialCity> | |
<EditorialState>…</EditorialState> | |
<EditorialCountry>…</EditorialCountry> | |
<EditorialDate>…</EditorialDate> | |
<Title>…</Title> | |
<ShootingCountry>…</ShootingCountry> | |
<ShootingDate>…</ShootingDate> | |
</Video> | |
<!-- repeat one <Video> per clip --> | |
</BlackBoxMetadata> | |
Below is a Python script you can drop into the same folder (or a parent folder) that will: | |
• Scan your batch directory (and optionally subfolders) for sidecar XMLs (the ones autometadata.py generated). | |
• For each sidecar, read its fields, then map/transform into the exact BlackBox tags. | |
• Write out a single BlackBox_Metadata.xml in the root of that scan. | |
You will still manually review “Category” or “BatchName” values if you need to adjust them, but field‐mapping and the boilerplate is fully automated. | |
⸻ | |
1) Example folder structure | |
B:\Video\StockFootage\Batches\well_pump\ | |
├── Z7V_1653.MP4 | |
├── Z7V_1653.xml ← output from autometadata.py | |
├── Z7V_1654.MP4 | |
├── Z7V_1654.xml | |
└── generate_blackbox_xml.py ← (the script below) | |
⸻ | |
2) generate_blackbox_xml.py | |
import os | |
import glob | |
import xml.etree.ElementTree as ET | |
# ─────────────────────────────────────────────────────────────────────────────── | |
# CONFIGURATION: adjust these defaults as needed | |
# ─────────────────────────────────────────────────────────────────────────────── | |
# If you want to recursively scan subfolders, set True; otherwise, False. | |
RECURSIVE_SCAN = False | |
# If you want to force a single Category for all clips, specify it here: | |
# Must exactly match one of BlackBox’s dropdown categories (e.g. "Nature", "Business", etc.). | |
# If left empty (""), the script will default to "Uncategorized" and you can fix later. | |
DEFAULT_CATEGORY = "Nature" | |
# If you want to force a single BatchName for all clips, specify it here: | |
# If you leave this blank (""), the script will try to pull <batch_name> from the folder name. | |
DEFAULT_BATCHNAME = "" | |
# If you know none of your clips are “editorial,” leave this as "False". | |
# If some are editorial, you can post‐edit these nodes or extend the script to detect it. | |
DEFAULT_EDITORIAL = "False" | |
# You can set a default “ShootingCountry” or leave blank if unknown. | |
DEFAULT_SHOOTING_COUNTRY = "United States of America (USA)" | |
# You can set a default “ShootingDate” (format: MM DD YYYY) or leave blank. | |
DEFAULT_SHOOTING_DATE = "" | |
# ─────────────────────────────────────────────────────────────────────────────── | |
# END CONFIGURATION | |
# ─────────────────────────────────────────────────────────────────────────────── | |
def find_sidecar_xmls(root_dir): | |
""" | |
Return a list of all *.xml sidecars under root_dir. | |
If RECURSIVE_SCAN=False, only looks in root_dir (non‐recursive). | |
If RECURSIVE_SCAN=True, walks subdirectories. | |
""" | |
pattern = "**/*.xml" if RECURSIVE_SCAN else "*.xml" | |
return glob.glob(os.path.join(root_dir, pattern), recursive=RECURSIVE_SCAN) | |
def parse_sidecar_xml(xml_path): | |
""" | |
Given a path to a sidecar .xml (from autometadata.py), | |
parse out its <filename>, <description>, <keywords> (and other optional fields). | |
Returns a dict of those values. | |
""" | |
tree = ET.parse(xml_path) | |
root = tree.getroot() | |
# Build a dict of everything under <video> → for example: | |
# 'filename': "Z7V_1653.MP4" | |
# 'description': "Slow motion bright 1920x1080 sharp focus stock footage." | |
# 'keywords': "stock, footage, slow motion, sharp, bright, 1920x1080" | |
# etc. | |
data = {} | |
for child in root: | |
tag = child.tag # e.g. 'filename' | |
text = child.text or "" | |
data[tag] = text | |
return data | |
def make_blackbox_video_element(sidecar_data, folder_name): | |
""" | |
Create an <Video> element (ElementTree) with EXACT BlackBox tags, using: | |
- sidecar_data: dict from parse_sidecar_xml() | |
- folder_name: name of the batch folder (used if DEFAULT_BATCHNAME is blank) | |
""" | |
v = ET.Element("Video") | |
# 1) FileName ← from sidecar | |
ET.SubElement(v, "FileName").text = sidecar_data.get("filename", "") | |
# 2) Description ← sidecar <description> | |
ET.SubElement(v, "Description").text = sidecar_data.get("description", "") | |
# 3) Keywords ← sidecar <keywords> | |
ET.SubElement(v, "Keywords").text = sidecar_data.get("keywords", "") | |
# 4) Category ← use DEFAULT_CATEGORY (must exactly match a dropdown entry) | |
ET.SubElement(v, "Category").text = DEFAULT_CATEGORY | |
# 5) BatchName ← DEFAULT_BATCHNAME or folder_name | |
batch = DEFAULT_BATCHNAME.strip() or folder_name | |
ET.SubElement(v, "BatchName").text = batch | |
# 6) Editorial ← DEFAULT_EDITORIAL ("True" or "False") | |
ET.SubElement(v, "Editorial").text = DEFAULT_EDITORIAL | |
# 7) EditorialText ← leave blank if Editorial=False | |
ET.SubElement(v, "EditorialText").text = "" | |
# 8) EditorialCity | |
ET.SubElement(v, "EditorialCity").text = "" | |
# 9) EditorialState | |
ET.SubElement(v, "EditorialState").text = "" | |
# 10) EditorialCountry ← leave blank if not editorial | |
ET.SubElement(v, "EditorialCountry").text = "" | |
# 11) EditorialDate ← leave blank | |
ET.SubElement(v, "EditorialDate").text = "" | |
# 12) Title (Optional) ← you could reuse sidecar_data["filename"] or leave blank | |
ET.SubElement(v, "Title").text = "" | |
# 13) ShootingCountry (Optional) ← DEFAULT_SHOOTING_COUNTRY | |
ET.SubElement(v, "ShootingCountry").text = DEFAULT_SHOOTING_COUNTRY | |
# 14) ShootingDate (Optional) ← DEFAULT_SHOOTING_DATE | |
ET.SubElement(v, "ShootingDate").text = DEFAULT_SHOOTING_DATE | |
return v | |
def generate_blackbox_metadata(root_folder, output_filename="BlackBox_Metadata.xml"): | |
""" | |
Scan for all sidecar XMLs under root_folder, parse, and build: | |
<BlackBoxMetadata> | |
<Video>…</Video> | |
<Video>…</Video> | |
… | |
</BlackBoxMetadata> | |
Write to output_filename in root_folder. | |
""" | |
sidecar_paths = find_sidecar_xmls(root_folder) | |
if not sidecar_paths: | |
print(f"No sidecar XMLs found under {root_folder}.") | |
return | |
# Root element | |
bb_root = ET.Element("BlackBoxMetadata") | |
for xml_path in sorted(sidecar_paths): | |
# Parse one sidecar | |
sidecar_data = parse_sidecar_xml(xml_path) | |
# Determine folder_name = immediate parent folder of this sidecar | |
parent_folder = os.path.basename(os.path.dirname(xml_path)) | |
# Create a <Video> element for this clip | |
video_elem = make_blackbox_video_element(sidecar_data, parent_folder) | |
bb_root.append(video_elem) | |
# Write out the combined BlackBox XML | |
tree = ET.ElementTree(bb_root) | |
output_path = os.path.join(root_folder, output_filename) | |
tree.write(output_path, encoding="utf-8", xml_declaration=True) | |
print(f"BlackBox metadata file written to:\n {output_path}") | |
if __name__ == "__main__": | |
# ASSUMPTION: You run this script from the batch folder (or parent). | |
# Change `base_dir` if you need a specific path. | |
base_dir = os.getcwd() | |
generate_blackbox_metadata(base_dir) | |
How it works: | |
1. Configuration block (top of file). | |
– RECURSIVE_SCAN: set True if you want to pick up sidecar XMLs in subfolders too. | |
– DEFAULT_CATEGORY: must exactly match one of BlackBox’s dropdowns (e.g. "Nature", "Business", etc.). | |
– DEFAULT_BATCHNAME: if blank, the script will use the name of the parent folder of each sidecar XML. | |
– DEFAULT_EDITORIAL: usually "False" for pure stock clips. | |
– DEFAULT_SHOOTING_COUNTRY & DEFAULT_SHOOTING_DATE: optional, can be blank or your default. | |
2. find_sidecar_xmls() | |
– Gathers all *.xml files in base_dir (or recursively if you enabled it). | |
3. parse_sidecar_xml(xml_path) | |
– Reads a single sidecar XML (the one your autometadata.py produced). | |
– Returns a dictionary like {'filename': 'Z7V_1653.MP4', 'description': '...', 'keywords': '...', ...}. | |
4. make_blackbox_video_element(sidecar_data, folder_name) | |
– Mints one <Video> element with exactly the 14 nodes that BlackBox requires. | |
– You see how each <Tag> maps to: | |
‣ <FileName> … </FileName> | |
‣ <Description> … </Description> | |
‣ <Keywords> … </Keywords> | |
‣ <Category> … </Category> | |
‣ <BatchName> … </BatchName> | |
‣ <Editorial> … </Editorial> | |
‣ <EditorialText> … </EditorialText> | |
‣ <EditorialCity> … </EditorialCity> | |
‣ <EditorialState> … </EditorialState> | |
‣ <EditorialCountry> … </EditorialCountry> | |
‣ <EditorialDate> … </EditorialDate> | |
‣ <Title> … </Title> | |
‣ <ShootingCountry> … </ShootingCountry> | |
‣ <ShootingDate> … </ShootingDate> | |
– Fields like <EditorialText> or <EditorialCity> are blank because DEFAULT_EDITORIAL="False". If you later need to mark certain clips as editorial, you can post‐edit these nodes or extend the script to read an “editorial” flag from your sidecar. | |
5. generate_blackbox_metadata() | |
– Loops over all sidecar XMLs, parses each into sidecar_data. | |
– Finds the parent folder name (used as BatchName if you didn’t set a default). | |
– Appends each <Video> to the single <BlackBoxMetadata> root. | |
– Writes out BlackBox_Metadata.xml in base_dir. | |
⸻ | |
🔧 How to Use | |
1. Install prerequisites (if you haven’t already): | |
pip install opencv-python | |
(Your autometadata.py variant should already have this.) | |
2. Drop the above generate_blackbox_xml.py into your batch folder. For example: | |
B:\Video\StockFootage\Batches\well_pump\generate_blackbox_xml.py | |
3. Open a terminal (PowerShell/CMD) and cd into that folder: | |
cd "B:\Video\StockFootage\Batches\well_pump" | |
4. Run: | |
python generate_blackbox_xml.py | |
You’ll see output like: | |
Found 3 sidecar XMLs. | |
Generated <Video> nodes for: | |
…\Z7V_1653.xml | |
…\Z7V_1654.xml | |
…\Z7V_1655.xml | |
BlackBox metadata file written to: | |
B:\Video\StockFootage\Batches\well_pump\BlackBox_Metadata.xml | |
5. Open B:\Video\StockFootage\Batches\well_pump\BlackBox_Metadata.xml in any text editor or browser to verify. It will look like: | |
<?xml version='1.0' encoding='utf-8'?> | |
<BlackBoxMetadata> | |
<Video> | |
<FileName>Z7V_1653.MP4</FileName> | |
<Description>Slow motion bright 1920x1080 sharp focus stock footage.</Description> | |
<Keywords>stock, footage, slow motion, sharp, bright, 1920x1080</Keywords> | |
<Category>Nature</Category> | |
<BatchName>well_pump</BatchName> | |
<Editorial>False</Editorial> | |
<EditorialText></EditorialText> | |
<EditorialCity></EditorialCity> | |
<EditorialState></EditorialState> | |
<EditorialCountry></EditorialCountry> | |
<EditorialDate></EditorialDate> | |
<Title></Title> | |
<ShootingCountry>United States of America (USA)</ShootingCountry> | |
<ShootingDate></ShootingDate> | |
</Video> | |
<Video> | |
<FileName>Z7V_1654.MP4</FileName> | |
<Description>Normal speed neutral lighting 1920x1080 resolution stock footage.</Description> | |
<Keywords>stock, footage, normal speed, soft, neutral, 1920x1080</Keywords> | |
<Category>Nature</Category> | |
<BatchName>well_pump</BatchName> | |
<Editorial>False</Editorial> | |
<EditorialText></EditorialText> | |
<EditorialCity></EditorialCity> | |
<EditorialState></EditorialState> | |
<EditorialCountry></EditorialCountry> | |
<EditorialDate></EditorialDate> | |
<Title></Title> | |
<ShootingCountry>United States of America (USA)</ShootingCountry> | |
<ShootingDate></ShootingDate> | |
</Video> | |
<!-- etc. --> | |
</BlackBoxMetadata> | |
6. Submit (or copy/paste) that single BlackBox_Metadata.xml to BlackBox Global’s uploader. | |
– This satisfies their requirement for a metadata XML (the fields match their dropdowns exactly). | |
– If they still prefer .xlsx, you can easily convert this XML to XLSX (or use a similar Python‐Pandas approach), but often BlackBox will accept either. | |
⸻ | |
🚀 Extending Further | |
• Add Object/Scene Labels (YOLO/CLIP): In your autometadata.py, append detected labels into <keywords> so that generate_blackbox_xml.py picks them up automatically. | |
• Auto‐detect “Editorial”: If you have a logic (e.g. “if any face is detected → editorial=True”), write that into sidecar, and then let parse_sidecar_xml() pass it through. | |
• BatchName from Folder Structure: We already use the folder name as fallback. If you have multiple nested levels (e.g. …\farm\well_pump\), you could split os.path.dirname(xml_path) and pick the last two folder names, etc. | |
• XLSX Output: If you need a .xlsx instead of XML, replace the final write step with Pandas: | |
import pandas as pd | |
def xml_to_xlsx(xml_path, xlsx_path): | |
tree = ET.parse(xml_path) | |
root = tree.getroot() | |
records = [] | |
for vid in root.findall("Video"): | |
row = { child.tag: (child.text or "") for child in vid } | |
records.append(row) | |
df = pd.DataFrame(records) | |
df.to_excel(xlsx_path, index=False) | |
# After generate_blackbox_metadata() finishes: | |
xml_out = os.path.join(base_dir, "BlackBox_Metadata.xml") | |
xlsx_out = os.path.join(base_dir, "BlackBox_Metadata.xlsx") | |
xml_to_xlsx(xml_out, xlsx_out) | |
print(f"Also wrote XLSX: {xlsx_out}") | |
That way you end up with both BlackBox_Metadata.xml and BlackBox_Metadata.xlsx. | |
⸻ | |
In summary: | |
• Your “autometadata.py” (gist) creates one sidecar *.xml per video with pure OpenCV‐inferred fields. | |
• The above generate_blackbox_xml.py scans those sidecars, maps fields into BlackBox’s required tags, and spits out a single BlackBox_Metadata.xml ready for upload. | |
• Folder names or DEFAULT_BATCHNAME fill in the <BatchName> node; DEFAULT_CATEGORY fills <Category>. | |
• If you later rename/move videos, simply re‐run both scripts in order, and your XML will stay in sync (because it always re‐reads *.mp4 sidecars and writes fresh metadata). | |
Feel free to tweak the “Configuration” block (top of generate_blackbox_xml.py) to match your exact dropdown values or editorial logic. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment