Skip to content

Instantly share code, notes, and snippets.

@MadhukarMoogala
Created September 9, 2025 11:36
Show Gist options
  • Save MadhukarMoogala/ad75c317f720844a5ee172f54a4a1545 to your computer and use it in GitHub Desktop.
Save MadhukarMoogala/ad75c317f720844a5ee172f54a4a1545 to your computer and use it in GitHub Desktop.
This tool extracts scale information from Bluebeam-calibrated PDF files. It identifies scale ruler values, drawing scale annotations, and slope annotations embedded in the PDF metadata.
#!/usr/bin/env python3
"""
extract_pdf_scales.py
This tool extracts scale information from Bluebeam-calibrated PDF files. It identifies scale ruler values, drawing scale annotations, and slope annotations embedded in the PDF metadata.
qpdf: https://github.com/qpdf/qpdf
Usage:
python extract_pdf_scales.py file.json
Author: Madhukar Moogala
## Prerequisites
- Python 3.x installed
- qpdf command-line tool installed
- Bluebeam-calibrated PDF file
## Usage Instructions
### Step 1: Convert PDF to JSON
Generate JSON data from your PDF using qpdf:
```bash
qpdf --json --json-stream-data=inline your_file.pdf > file.json
```
### Step 2: Run the Python Script
Extract scale information:
```bash
python extract_pdf_scales.py file.json
```
## Output
The script will display:
1. **Scale Ruler Values** - All measurement scales available in Bluebeam (e.g., 1/4" = 1', 1:48 ratio)
2. **Drawing Scale Annotations** - Actual scales used on specific drawings
3. **Slope Annotations** - Roof slopes and construction details (e.g., SLOPE 4:12)
4. **Summary** - Total counts and PDF information
## Example Output
```
============================================================
PDF SCALE EXTRACTION RESULTS
============================================================
Total Pages: 15
1. SCALE RULER VALUES (15 found):
----------------------------------------
1. 0.03125" = 1' (1:384 ) - obj:2863 0 R
2. 0.0625" = 1' (1:192 ) - obj:2865 0 R
3. 0.25" = 1' (1:48 ) - obj:2861 0 R
...
2. DRAWING SCALE ANNOTATIONS (9 found):
----------------------------------------
SCALE: 1/2"=1'-0"
(Found in 9 locations)
3. SLOPE ANNOTATIONS (4 found):
----------------------------------------
SLOPE 4:12
(Found in 4 locations)
```
"""
import json
import sys
import re
from collections import defaultdict
if len(sys.argv) < 2:
print("Usage: python extract_pdf_scales.py file.json")
sys.exit(1)
def main():
json_file = sys.argv[1]
with open(json_file, "r", encoding="utf-8") as fh:
data = json.load(fh)
print("=" * 60)
print("PDF SCALE EXTRACTION RESULTS")
print("=" * 60)
# Get page information
pages = data.get("pages", [])
print(f"Total Pages: {len(pages)}")
# Extract scale information
qpdf_list = data.get("qpdf", [])
if len(qpdf_list) < 2:
print("No object data found")
return
objects_dict = qpdf_list[1]
# Find scale ruler values (/R fields)
scale_rulers = []
scale_annotations = []
slope_annotations = []
for key, obj in objects_dict.items():
if key.startswith("obj:") and isinstance(obj, dict):
value = obj.get("value")
if isinstance(value, dict):
# Scale ruler values
r_field = value.get("/R")
if isinstance(r_field, str) and "in = 1 ft" in r_field:
match = re.search(r"(\d*\.?\d+)\s*in\s*=\s*1\s*ft", r_field)
if match:
inches = float(match.group(1))
ratio = int(12 / inches)
scale_rulers.append(
{
"inches": inches,
"scale": f"{inches}\" = 1'",
"ratio": f"1:{ratio}",
"object": key,
}
)
# Scale annotations
contents = value.get("/Contents")
if isinstance(contents, str):
if "SCALE:" in contents and "=" in contents:
scale_annotations.append(
{"content": contents.replace("u:", ""), "object": key}
)
elif "SLOPE" in contents and ":" in contents:
slope_annotations.append(
{"content": contents.replace("u:", ""), "object": key}
)
# Sort scale rulers by size
scale_rulers.sort(key=lambda x: x["inches"])
# Display results
print(f"\n1. SCALE RULER VALUES ({len(scale_rulers)} found):")
print("-" * 40)
for i, scale in enumerate(scale_rulers, 1):
print(f"{i:2d}. {scale['scale']:12} ({scale['ratio']:6}) - {scale['object']}")
if scale_annotations:
print(f"\n2. DRAWING SCALE ANNOTATIONS ({len(scale_annotations)} found):")
print("-" * 40)
unique_scales = set()
for annotation in scale_annotations:
if annotation["content"] not in unique_scales:
unique_scales.add(annotation["content"])
print(f" {annotation['content']}")
print(f" (Found in {len(scale_annotations)} locations)")
if slope_annotations:
print(f"\n3. SLOPE ANNOTATIONS ({len(slope_annotations)} found):")
print("-" * 40)
unique_slopes = set()
for annotation in slope_annotations:
if annotation["content"] not in unique_slopes:
unique_slopes.add(annotation["content"])
print(f" {annotation['content']}")
print(f" (Found in {len(slope_annotations)} locations)")
# Summary
print(f"\n" + "=" * 60)
print("SUMMARY:")
print(f"• {len(scale_rulers)} scale ruler increments (for measurements)")
print(
f"• {len(set(s['content'] for s in scale_annotations))} unique drawing scales"
)
print(
f"• {len(set(s['content'] for s in slope_annotations))} unique slope annotations"
)
print(f"• {len(pages)} total pages in PDF")
print("=" * 60)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment