Created
July 2, 2025 23:24
-
-
Save philipprochazka/d778c7350831f3aebd9afe48b254246a to your computer and use it in GitHub Desktop.
Recursively scan a folder for SVG/CSS, For each file, extract its metadata. Output a single batch_summary.extract.md with a TOC{Table of Contents} and collapsible per-file sections,
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import os | |
from pathlib import Path | |
import xml.etree.ElementTree as ET | |
import re | |
# --- Hybrid extraction functions (from extractor_hybrid.py, refactored) --- | |
def extract_svg_doc_properties(svg_root): | |
width = svg_root.attrib.get('width', 'unknown') | |
height = svg_root.attrib.get('height', 'unknown') | |
viewBox = svg_root.attrib.get('viewBox', 'none') | |
return {'Width': width, 'Height': height, 'ViewBox': viewBox} | |
def extract_svg_elements(svg_root): | |
elements = [] | |
for elem in svg_root.iter(): | |
tag = elem.tag.split('}')[-1] | |
el_id = elem.attrib.get('id', tag) | |
color_attrs = {} | |
for attr in ['fill', 'stroke']: | |
if attr in elem.attrib: | |
color_attrs[attr] = elem.attrib[attr] | |
stroke_width = elem.attrib.get('stroke-width') | |
font_family = elem.attrib.get('font-family') | |
font_weight = elem.attrib.get('font-weight') | |
font_size = elem.attrib.get('font-size') | |
other = {} | |
if 'opacity' in elem.attrib: | |
other['opacity'] = elem.attrib['opacity'] | |
if color_attrs or stroke_width or font_family or font_weight or font_size or other: | |
elements.append({ | |
'id': el_id, | |
'tag': tag, | |
'fill': color_attrs.get('fill'), | |
'stroke': color_attrs.get('stroke'), | |
'stroke_width': stroke_width, | |
'font_family': font_family, | |
'font_weight': font_weight, | |
'font_size': font_size, | |
'other': other | |
}) | |
return elements | |
def extract_colors_and_strokes(elements): | |
colors = set() | |
strokes = [] | |
fonts = [] | |
for e in elements: | |
if e['fill']: | |
colors.add(e['fill']) | |
if e['stroke']: | |
colors.add(e['stroke']) | |
if e['stroke_width']: | |
strokes.append({'element': e['id'], 'stroke_width': e['stroke_width']}) | |
if e['font_family'] or e['font_weight'] or e['font_size']: | |
fonts.append({ | |
'element': e['id'], | |
'font_family': e['font_family'], | |
'font_weight': e['font_weight'], | |
'font_size': e['font_size'] | |
}) | |
return colors, strokes, fonts | |
def extract_css_vars(css_text): | |
# Matches --var: value; | |
return re.findall(r'--([\w-]+)\s*:\s*([^;]+);', css_text) | |
def extract_colors_from_css(css_text): | |
colors = set() | |
# Hex | |
colors.update(re.findall(r'#(?:[0-9a-fA-F]{3}){1,2}', css_text)) | |
# rgb or rgba | |
colors.update(re.findall(r'rgb[a]?\([^\)]+\)', css_text)) | |
# hsl or hsla | |
colors.update(re.findall(r'hsl[a]?\([^\)]+\)', css_text)) | |
# Named colors (basic) | |
colors.update(re.findall(r':\s*([a-zA-Z]+)\s*;', css_text)) | |
return colors | |
def extract_fonts_from_css(css_text): | |
# Simple font-family, font-weight, font-size extraction | |
font_blocks = [] | |
for m in re.finditer(r'([.#]?\w+)\s*\{([^}]+)\}', css_text): | |
selector, block = m.groups() | |
family = re.search(r'font-family\s*:\s*([^;]+);', block) | |
weight = re.search(r'font-weight\s*:\s*([^;]+);', block) | |
size = re.search(r'font-size\s*:\s*([^;]+);', block) | |
if family or weight or size: | |
font_blocks.append({ | |
'element': selector, | |
'font_family': family.group(1).strip() if family else None, | |
'font_weight': weight.group(1).strip() if weight else None, | |
'font_size': size.group(1).strip() if size else None | |
}) | |
return font_blocks | |
def make_swatch(color): | |
color = color.lstrip('#') | |
if not color or color == 'none': | |
return "" | |
return f"" | |
def markdown_escape(text): | |
return text.replace('_', '\\_').replace('*', '\\*') if text else text | |
# --- Batch logic and master summary output --- | |
def find_svg_css_files(folder): | |
files = [] | |
for root, _, filenames in os.walk(folder): | |
for fn in filenames: | |
if fn.lower().endswith(('.svg', '.css')): | |
files.append(os.path.join(root, fn)) | |
return files | |
def extract_metadata_for_file(file_path): | |
ext = Path(file_path).suffix.lower() | |
rel_path = os.path.relpath(file_path) | |
doc_props = {} | |
colors = set() | |
strokes = [] | |
fonts = [] | |
elements = [] | |
css_vars = [] | |
css_colors = set() | |
css_fonts = [] | |
file_md = [] | |
try: | |
if ext == '.svg': | |
tree = ET.parse(str(file_path)) | |
svg_root = tree.getroot() | |
doc_props = extract_svg_doc_properties(svg_root) | |
elements = extract_svg_elements(svg_root) | |
colors, strokes, fonts = extract_colors_and_strokes(elements) | |
elif ext == '.css': | |
text = Path(file_path).read_text(encoding='utf-8') | |
css_vars = extract_css_vars(text) | |
css_colors = extract_colors_from_css(text) | |
css_fonts = extract_fonts_from_css(text) | |
doc_props = {'File': rel_path} | |
else: | |
return None | |
# --- Hybrid markdown for ONE file --- | |
file_md.append(f"## {rel_path}\n<a name=\"{rel_path.replace('/', '-').replace('.', '-')}\"></a>") | |
file_md.append("\n### π Document Properties\n") | |
file_md.append("| Property | Value |") | |
file_md.append("|-------------|----------------|") | |
for k, v in doc_props.items(): | |
file_md.append(f"| {k} | {v} |") | |
file_md.append("") | |
# Color Palette | |
all_colors = set(colors) | |
all_colors.update(css_colors) | |
for v in css_vars: | |
all_colors.add(v[1].strip()) | |
file_md.append("### π¨ Color Palette\n") | |
file_md.append("| Color Code | Swatch | Found In |") | |
file_md.append("|------------|--------|--------------|") | |
for color in sorted(all_colors): | |
color_disp = f"`{color}`" | |
swatch = make_swatch(color) | |
found = [] | |
if color in colors: | |
found.append("SVG") | |
if color in css_colors: | |
found.append("CSS") | |
if any(color == v[1].strip() for v in css_vars): | |
found.append("css variable") | |
file_md.append(f"| {color_disp} | {swatch} | {', '.join(found)} |") | |
file_md.append("") | |
# Stroke Widths | |
if strokes: | |
file_md.append("### βοΈ Stroke Widths\n") | |
file_md.append("| Element | Stroke Width |") | |
file_md.append("|-----------------|-------------|") | |
for s in strokes: | |
file_md.append(f"| `{s['element']}` | {s['stroke_width']} |") | |
file_md.append("") | |
# Font Usage | |
font_rows = fonts + css_fonts | |
if font_rows: | |
file_md.append("### π °οΈ Font Usage\n") | |
file_md.append("| Element/Class | Font Family | Font Weight | Font Size |") | |
file_md.append("|-----------------|-------------|-------------|-----------|") | |
for f in font_rows: | |
file_md.append(f"| `{f['element']}` | {markdown_escape(f.get('font_family',''))} | {markdown_escape(f.get('font_weight',''))} | {markdown_escape(f.get('font_size',''))} |") | |
file_md.append("") | |
# Per-Element Details | |
if elements: | |
file_md.append("### π Per-Element Details\n") | |
for e in elements: | |
file_md.append(f"<details>\n<summary><strong>{markdown_escape(e['id'])}</strong></summary>") | |
if e['fill']: file_md.append(f"- **Fill:** `{e['fill']}`") | |
if e['stroke']: file_md.append(f"- **Stroke:** `{e['stroke']}`") | |
if e['stroke_width']: file_md.append(f"- **Stroke Width:** `{e['stroke_width']}`") | |
if e['font_family'] or e['font_weight'] or e['font_size']: | |
font_desc = f"{e.get('font_family', '')} {e.get('font_weight','')} {e.get('font_size','')}".strip() | |
file_md.append(f"- **Font:** _{font_desc}_") | |
if e['other']: | |
for ok, ov in e['other'].items(): | |
file_md.append(f"- **Other Attributes:** {ok}: {ov}") | |
file_md.append("</details>") | |
file_md.append("") | |
# CSS Variables | |
if css_vars: | |
file_md.append("### π·οΈ CSS Variables\n") | |
file_md.append("| Variable Name | Value (Color) |") | |
file_md.append("|------------------|--------------|") | |
for varname, value in css_vars: | |
file_md.append(f"| `{varname}` | `{value.strip()}` |") | |
file_md.append("\n#### Color Variable = Value List") | |
for varname, value in css_vars: | |
file_md.append(f"`{varname} = {value.strip()}` ") | |
file_md.append("") | |
return "\n".join(file_md) | |
except Exception as e: | |
return f"> Error extracting `{rel_path}`: {e}" | |
def main(folder): | |
folder = Path(folder) | |
files = find_svg_css_files(folder) | |
master_md = [] | |
master_md.append(f"# π¦ Batch Extract Summary\n\nExtracted design metadata for all SVG/CSS assets in `{folder}`.\n") | |
master_md.append("## Table of Contents\n") | |
for f in files: | |
rel = os.path.relpath(f, folder) | |
anchor = rel.replace('/', '-').replace('.', '-') | |
master_md.append(f"- [{rel}](#{anchor})") | |
master_md.append("\n---\n") | |
for f in files: | |
print(f"Extracting: {f}") | |
section_md = extract_metadata_for_file(f) | |
if section_md: | |
master_md.append("\n---\n") | |
master_md.append(section_md) | |
out_path = folder / "batch_summary.extract.md" | |
out_path.write_text('\n'.join(master_md), encoding='utf-8') | |
print(f"Batch summary written to: {out_path}") | |
if __name__ == '__main__': | |
if len(sys.argv) < 2: | |
print("Usage: python extractor_root_metadata.py <folder>") | |
sys.exit(1) | |
main(sys.argv[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment