Created
September 9, 2025 11:36
-
-
Save MadhukarMoogala/ad75c317f720844a5ee172f54a4a1545 to your computer and use it in GitHub Desktop.
This tool extracts scale information from Bluebeam-calibrated PDF files. It identifies scale ruler values, drawing scale annotations, and slope annotations embedded in the PDF metadata.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
extract_pdf_scales.py | |
This tool extracts scale information from Bluebeam-calibrated PDF files. It identifies scale ruler values, drawing scale annotations, and slope annotations embedded in the PDF metadata. | |
qpdf: https://github.com/qpdf/qpdf | |
Usage: | |
python extract_pdf_scales.py file.json | |
Author: Madhukar Moogala | |
## Prerequisites | |
- Python 3.x installed | |
- qpdf command-line tool installed | |
- Bluebeam-calibrated PDF file | |
## Usage Instructions | |
### Step 1: Convert PDF to JSON | |
Generate JSON data from your PDF using qpdf: | |
```bash | |
qpdf --json --json-stream-data=inline your_file.pdf > file.json | |
``` | |
### Step 2: Run the Python Script | |
Extract scale information: | |
```bash | |
python extract_pdf_scales.py file.json | |
``` | |
## Output | |
The script will display: | |
1. **Scale Ruler Values** - All measurement scales available in Bluebeam (e.g., 1/4" = 1', 1:48 ratio) | |
2. **Drawing Scale Annotations** - Actual scales used on specific drawings | |
3. **Slope Annotations** - Roof slopes and construction details (e.g., SLOPE 4:12) | |
4. **Summary** - Total counts and PDF information | |
## Example Output | |
``` | |
============================================================ | |
PDF SCALE EXTRACTION RESULTS | |
============================================================ | |
Total Pages: 15 | |
1. SCALE RULER VALUES (15 found): | |
---------------------------------------- | |
1. 0.03125" = 1' (1:384 ) - obj:2863 0 R | |
2. 0.0625" = 1' (1:192 ) - obj:2865 0 R | |
3. 0.25" = 1' (1:48 ) - obj:2861 0 R | |
... | |
2. DRAWING SCALE ANNOTATIONS (9 found): | |
---------------------------------------- | |
SCALE: 1/2"=1'-0" | |
(Found in 9 locations) | |
3. SLOPE ANNOTATIONS (4 found): | |
---------------------------------------- | |
SLOPE 4:12 | |
(Found in 4 locations) | |
``` | |
""" | |
import json | |
import sys | |
import re | |
from collections import defaultdict | |
if len(sys.argv) < 2: | |
print("Usage: python extract_pdf_scales.py file.json") | |
sys.exit(1) | |
def main(): | |
json_file = sys.argv[1] | |
with open(json_file, "r", encoding="utf-8") as fh: | |
data = json.load(fh) | |
print("=" * 60) | |
print("PDF SCALE EXTRACTION RESULTS") | |
print("=" * 60) | |
# Get page information | |
pages = data.get("pages", []) | |
print(f"Total Pages: {len(pages)}") | |
# Extract scale information | |
qpdf_list = data.get("qpdf", []) | |
if len(qpdf_list) < 2: | |
print("No object data found") | |
return | |
objects_dict = qpdf_list[1] | |
# Find scale ruler values (/R fields) | |
scale_rulers = [] | |
scale_annotations = [] | |
slope_annotations = [] | |
for key, obj in objects_dict.items(): | |
if key.startswith("obj:") and isinstance(obj, dict): | |
value = obj.get("value") | |
if isinstance(value, dict): | |
# Scale ruler values | |
r_field = value.get("/R") | |
if isinstance(r_field, str) and "in = 1 ft" in r_field: | |
match = re.search(r"(\d*\.?\d+)\s*in\s*=\s*1\s*ft", r_field) | |
if match: | |
inches = float(match.group(1)) | |
ratio = int(12 / inches) | |
scale_rulers.append( | |
{ | |
"inches": inches, | |
"scale": f"{inches}\" = 1'", | |
"ratio": f"1:{ratio}", | |
"object": key, | |
} | |
) | |
# Scale annotations | |
contents = value.get("/Contents") | |
if isinstance(contents, str): | |
if "SCALE:" in contents and "=" in contents: | |
scale_annotations.append( | |
{"content": contents.replace("u:", ""), "object": key} | |
) | |
elif "SLOPE" in contents and ":" in contents: | |
slope_annotations.append( | |
{"content": contents.replace("u:", ""), "object": key} | |
) | |
# Sort scale rulers by size | |
scale_rulers.sort(key=lambda x: x["inches"]) | |
# Display results | |
print(f"\n1. SCALE RULER VALUES ({len(scale_rulers)} found):") | |
print("-" * 40) | |
for i, scale in enumerate(scale_rulers, 1): | |
print(f"{i:2d}. {scale['scale']:12} ({scale['ratio']:6}) - {scale['object']}") | |
if scale_annotations: | |
print(f"\n2. DRAWING SCALE ANNOTATIONS ({len(scale_annotations)} found):") | |
print("-" * 40) | |
unique_scales = set() | |
for annotation in scale_annotations: | |
if annotation["content"] not in unique_scales: | |
unique_scales.add(annotation["content"]) | |
print(f" {annotation['content']}") | |
print(f" (Found in {len(scale_annotations)} locations)") | |
if slope_annotations: | |
print(f"\n3. SLOPE ANNOTATIONS ({len(slope_annotations)} found):") | |
print("-" * 40) | |
unique_slopes = set() | |
for annotation in slope_annotations: | |
if annotation["content"] not in unique_slopes: | |
unique_slopes.add(annotation["content"]) | |
print(f" {annotation['content']}") | |
print(f" (Found in {len(slope_annotations)} locations)") | |
# Summary | |
print(f"\n" + "=" * 60) | |
print("SUMMARY:") | |
print(f"• {len(scale_rulers)} scale ruler increments (for measurements)") | |
print( | |
f"• {len(set(s['content'] for s in scale_annotations))} unique drawing scales" | |
) | |
print( | |
f"• {len(set(s['content'] for s in slope_annotations))} unique slope annotations" | |
) | |
print(f"• {len(pages)} total pages in PDF") | |
print("=" * 60) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment