Skip to content

Instantly share code, notes, and snippets.

@philipprochazka
Created July 2, 2025 23:24
Show Gist options
  • Save philipprochazka/d778c7350831f3aebd9afe48b254246a to your computer and use it in GitHub Desktop.
Save philipprochazka/d778c7350831f3aebd9afe48b254246a to your computer and use it in GitHub Desktop.
Recursively scan a folder for SVG/CSS, For each file, extract its metadata. Output a single batch_summary.extract.md with a TOC{Table of Contents} and collapsible per-file sections,
import sys
import os
from pathlib import Path
import xml.etree.ElementTree as ET
import re
# --- Hybrid extraction functions (from extractor_hybrid.py, refactored) ---
def extract_svg_doc_properties(svg_root):
width = svg_root.attrib.get('width', 'unknown')
height = svg_root.attrib.get('height', 'unknown')
viewBox = svg_root.attrib.get('viewBox', 'none')
return {'Width': width, 'Height': height, 'ViewBox': viewBox}
def extract_svg_elements(svg_root):
elements = []
for elem in svg_root.iter():
tag = elem.tag.split('}')[-1]
el_id = elem.attrib.get('id', tag)
color_attrs = {}
for attr in ['fill', 'stroke']:
if attr in elem.attrib:
color_attrs[attr] = elem.attrib[attr]
stroke_width = elem.attrib.get('stroke-width')
font_family = elem.attrib.get('font-family')
font_weight = elem.attrib.get('font-weight')
font_size = elem.attrib.get('font-size')
other = {}
if 'opacity' in elem.attrib:
other['opacity'] = elem.attrib['opacity']
if color_attrs or stroke_width or font_family or font_weight or font_size or other:
elements.append({
'id': el_id,
'tag': tag,
'fill': color_attrs.get('fill'),
'stroke': color_attrs.get('stroke'),
'stroke_width': stroke_width,
'font_family': font_family,
'font_weight': font_weight,
'font_size': font_size,
'other': other
})
return elements
def extract_colors_and_strokes(elements):
colors = set()
strokes = []
fonts = []
for e in elements:
if e['fill']:
colors.add(e['fill'])
if e['stroke']:
colors.add(e['stroke'])
if e['stroke_width']:
strokes.append({'element': e['id'], 'stroke_width': e['stroke_width']})
if e['font_family'] or e['font_weight'] or e['font_size']:
fonts.append({
'element': e['id'],
'font_family': e['font_family'],
'font_weight': e['font_weight'],
'font_size': e['font_size']
})
return colors, strokes, fonts
def extract_css_vars(css_text):
# Matches --var: value;
return re.findall(r'--([\w-]+)\s*:\s*([^;]+);', css_text)
def extract_colors_from_css(css_text):
colors = set()
# Hex
colors.update(re.findall(r'#(?:[0-9a-fA-F]{3}){1,2}', css_text))
# rgb or rgba
colors.update(re.findall(r'rgb[a]?\([^\)]+\)', css_text))
# hsl or hsla
colors.update(re.findall(r'hsl[a]?\([^\)]+\)', css_text))
# Named colors (basic)
colors.update(re.findall(r':\s*([a-zA-Z]+)\s*;', css_text))
return colors
def extract_fonts_from_css(css_text):
# Simple font-family, font-weight, font-size extraction
font_blocks = []
for m in re.finditer(r'([.#]?\w+)\s*\{([^}]+)\}', css_text):
selector, block = m.groups()
family = re.search(r'font-family\s*:\s*([^;]+);', block)
weight = re.search(r'font-weight\s*:\s*([^;]+);', block)
size = re.search(r'font-size\s*:\s*([^;]+);', block)
if family or weight or size:
font_blocks.append({
'element': selector,
'font_family': family.group(1).strip() if family else None,
'font_weight': weight.group(1).strip() if weight else None,
'font_size': size.group(1).strip() if size else None
})
return font_blocks
def make_swatch(color):
color = color.lstrip('#')
if not color or color == 'none':
return ""
return f"![](https://img.shields.io/badge/-%20-%23{color}?style=flat-square&labelColor={color})"
def markdown_escape(text):
return text.replace('_', '\\_').replace('*', '\\*') if text else text
# --- Batch logic and master summary output ---
def find_svg_css_files(folder):
files = []
for root, _, filenames in os.walk(folder):
for fn in filenames:
if fn.lower().endswith(('.svg', '.css')):
files.append(os.path.join(root, fn))
return files
def extract_metadata_for_file(file_path):
ext = Path(file_path).suffix.lower()
rel_path = os.path.relpath(file_path)
doc_props = {}
colors = set()
strokes = []
fonts = []
elements = []
css_vars = []
css_colors = set()
css_fonts = []
file_md = []
try:
if ext == '.svg':
tree = ET.parse(str(file_path))
svg_root = tree.getroot()
doc_props = extract_svg_doc_properties(svg_root)
elements = extract_svg_elements(svg_root)
colors, strokes, fonts = extract_colors_and_strokes(elements)
elif ext == '.css':
text = Path(file_path).read_text(encoding='utf-8')
css_vars = extract_css_vars(text)
css_colors = extract_colors_from_css(text)
css_fonts = extract_fonts_from_css(text)
doc_props = {'File': rel_path}
else:
return None
# --- Hybrid markdown for ONE file ---
file_md.append(f"## {rel_path}\n<a name=\"{rel_path.replace('/', '-').replace('.', '-')}\"></a>")
file_md.append("\n### πŸ“ Document Properties\n")
file_md.append("| Property | Value |")
file_md.append("|-------------|----------------|")
for k, v in doc_props.items():
file_md.append(f"| {k} | {v} |")
file_md.append("")
# Color Palette
all_colors = set(colors)
all_colors.update(css_colors)
for v in css_vars:
all_colors.add(v[1].strip())
file_md.append("### 🎨 Color Palette\n")
file_md.append("| Color Code | Swatch | Found In |")
file_md.append("|------------|--------|--------------|")
for color in sorted(all_colors):
color_disp = f"`{color}`"
swatch = make_swatch(color)
found = []
if color in colors:
found.append("SVG")
if color in css_colors:
found.append("CSS")
if any(color == v[1].strip() for v in css_vars):
found.append("css variable")
file_md.append(f"| {color_disp} | {swatch} | {', '.join(found)} |")
file_md.append("")
# Stroke Widths
if strokes:
file_md.append("### ✏️ Stroke Widths\n")
file_md.append("| Element | Stroke Width |")
file_md.append("|-----------------|-------------|")
for s in strokes:
file_md.append(f"| `{s['element']}` | {s['stroke_width']} |")
file_md.append("")
# Font Usage
font_rows = fonts + css_fonts
if font_rows:
file_md.append("### πŸ…°οΈ Font Usage\n")
file_md.append("| Element/Class | Font Family | Font Weight | Font Size |")
file_md.append("|-----------------|-------------|-------------|-----------|")
for f in font_rows:
file_md.append(f"| `{f['element']}` | {markdown_escape(f.get('font_family',''))} | {markdown_escape(f.get('font_weight',''))} | {markdown_escape(f.get('font_size',''))} |")
file_md.append("")
# Per-Element Details
if elements:
file_md.append("### πŸ“ Per-Element Details\n")
for e in elements:
file_md.append(f"<details>\n<summary><strong>{markdown_escape(e['id'])}</strong></summary>")
if e['fill']: file_md.append(f"- **Fill:** `{e['fill']}`")
if e['stroke']: file_md.append(f"- **Stroke:** `{e['stroke']}`")
if e['stroke_width']: file_md.append(f"- **Stroke Width:** `{e['stroke_width']}`")
if e['font_family'] or e['font_weight'] or e['font_size']:
font_desc = f"{e.get('font_family', '')} {e.get('font_weight','')} {e.get('font_size','')}".strip()
file_md.append(f"- **Font:** _{font_desc}_")
if e['other']:
for ok, ov in e['other'].items():
file_md.append(f"- **Other Attributes:** {ok}: {ov}")
file_md.append("</details>")
file_md.append("")
# CSS Variables
if css_vars:
file_md.append("### 🏷️ CSS Variables\n")
file_md.append("| Variable Name | Value (Color) |")
file_md.append("|------------------|--------------|")
for varname, value in css_vars:
file_md.append(f"| `{varname}` | `{value.strip()}` |")
file_md.append("\n#### Color Variable = Value List")
for varname, value in css_vars:
file_md.append(f"`{varname} = {value.strip()}` ")
file_md.append("")
return "\n".join(file_md)
except Exception as e:
return f"> Error extracting `{rel_path}`: {e}"
def main(folder):
folder = Path(folder)
files = find_svg_css_files(folder)
master_md = []
master_md.append(f"# πŸ“¦ Batch Extract Summary\n\nExtracted design metadata for all SVG/CSS assets in `{folder}`.\n")
master_md.append("## Table of Contents\n")
for f in files:
rel = os.path.relpath(f, folder)
anchor = rel.replace('/', '-').replace('.', '-')
master_md.append(f"- [{rel}](#{anchor})")
master_md.append("\n---\n")
for f in files:
print(f"Extracting: {f}")
section_md = extract_metadata_for_file(f)
if section_md:
master_md.append("\n---\n")
master_md.append(section_md)
out_path = folder / "batch_summary.extract.md"
out_path.write_text('\n'.join(master_md), encoding='utf-8')
print(f"Batch summary written to: {out_path}")
if __name__ == '__main__':
if len(sys.argv) < 2:
print("Usage: python extractor_root_metadata.py <folder>")
sys.exit(1)
main(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment