MadhukarMoogala · September 9, 2025 11:36
diff --git a/extract_pdf_scales.py b/extract_pdf_scales.py
 #!/usr/bin/env python3
 """

 extract_pdf_scales.py
 This tool extracts scale information from Bluebeam-calibrated PDF files. It identifies scale ruler values, drawing scale annotations, and slope annotations embedded in the PDF metadata.
 qpdf: https://github.com/qpdf/qpdf

 Usage:
    python extract_pdf_scales.py file.json
    
 Author: Madhukar Moogala

 ## Prerequisites
 - Python 3.x installed
 - qpdf command-line tool installed
 - Bluebeam-calibrated PDF file

 ## Usage Instructions

 ### Step 1: Convert PDF to JSON
 Generate JSON data from your PDF using qpdf:
 ```bash
 qpdf --json --json-stream-data=inline your_file.pdf > file.json
 ```

 ### Step 2: Run the Python Script
 Extract scale information:
 ```bash
 python extract_pdf_scales.py file.json
 ```

 ## Output
 The script will display:
 1. **Scale Ruler Values** - All measurement scales available in Bluebeam (e.g., 1/4" = 1', 1:48 ratio)
 2. **Drawing Scale Annotations** - Actual scales used on specific drawings
 3. **Slope Annotations** - Roof slopes and construction details (e.g., SLOPE 4:12)
 4. **Summary** - Total counts and PDF information

 ## Example Output
 ```
 ============================================================
 PDF SCALE EXTRACTION RESULTS
 ============================================================
 Total Pages: 15

 1. SCALE RULER VALUES (15 found):
 ----------------------------------------
 1. 0.03125" = 1' (1:384 ) - obj:2863 0 R
 2. 0.0625" = 1' (1:192 ) - obj:2865 0 R
 3. 0.25" = 1'   (1:48  ) - obj:2861 0 R
 ...

 2. DRAWING SCALE ANNOTATIONS (9 found):
 ----------------------------------------
    SCALE: 1/2"=1'-0"
    (Found in 9 locations)

 3. SLOPE ANNOTATIONS (4 found):
 ----------------------------------------
    SLOPE 4:12
    (Found in 4 locations)
 ```
 """

 import json
 import sys
 import re
 from collections import defaultdict

 if len(sys.argv) < 2:
    print("Usage: python extract_pdf_scales.py file.json")
    sys.exit(1)


 def main():
    json_file = sys.argv[1]

    with open(json_file, "r", encoding="utf-8") as fh:
        data = json.load(fh)

    print("=" * 60)
    print("PDF SCALE EXTRACTION RESULTS")
    print("=" * 60)

    # Get page information
    pages = data.get("pages", [])
    print(f"Total Pages: {len(pages)}")

    # Extract scale information
    qpdf_list = data.get("qpdf", [])
    if len(qpdf_list) < 2:
        print("No object data found")
        return

    objects_dict = qpdf_list[1]

    # Find scale ruler values (/R fields)
    scale_rulers = []
    scale_annotations = []
    slope_annotations = []

    for key, obj in objects_dict.items():
        if key.startswith("obj:") and isinstance(obj, dict):
            value = obj.get("value")
            if isinstance(value, dict):
                # Scale ruler values
                r_field = value.get("/R")
                if isinstance(r_field, str) and "in = 1 ft" in r_field:
                    match = re.search(r"(\d*\.?\d+)\s*in\s*=\s*1\s*ft", r_field)
                    if match:
                        inches = float(match.group(1))
                        ratio = int(12 / inches)
                        scale_rulers.append(
                            {
                                "inches": inches,
                                "scale": f"{inches}\" = 1'",
                                "ratio": f"1:{ratio}",
                                "object": key,
                            }
                        )

                # Scale annotations
                contents = value.get("/Contents")
                if isinstance(contents, str):
                    if "SCALE:" in contents and "=" in contents:
                        scale_annotations.append(
                            {"content": contents.replace("u:", ""), "object": key}
                        )
                    elif "SLOPE" in contents and ":" in contents:
                        slope_annotations.append(
                            {"content": contents.replace("u:", ""), "object": key}
                        )

    # Sort scale rulers by size
    scale_rulers.sort(key=lambda x: x["inches"])

    # Display results
    print(f"\n1. SCALE RULER VALUES ({len(scale_rulers)} found):")
    print("-" * 40)
    for i, scale in enumerate(scale_rulers, 1):
        print(f"{i:2d}. {scale['scale']:12} ({scale['ratio']:6}) - {scale['object']}")

    if scale_annotations:
        print(f"\n2. DRAWING SCALE ANNOTATIONS ({len(scale_annotations)} found):")
        print("-" * 40)
        unique_scales = set()
        for annotation in scale_annotations:
            if annotation["content"] not in unique_scales:
                unique_scales.add(annotation["content"])
                print(f"    {annotation['content']}")
        print(f"    (Found in {len(scale_annotations)} locations)")

    if slope_annotations:
        print(f"\n3. SLOPE ANNOTATIONS ({len(slope_annotations)} found):")
        print("-" * 40)
        unique_slopes = set()
        for annotation in slope_annotations:
            if annotation["content"] not in unique_slopes:
                unique_slopes.add(annotation["content"])
                print(f"    {annotation['content']}")
        print(f"    (Found in {len(slope_annotations)} locations)")

    # Summary
    print(f"\n" + "=" * 60)
    print("SUMMARY:")
    print(f"• {len(scale_rulers)} scale ruler increments (for measurements)")
    print(
        f"• {len(set(s['content'] for s in scale_annotations))} unique drawing scales"
    )
    print(
        f"• {len(set(s['content'] for s in slope_annotations))} unique slope annotations"
    )
    print(f"• {len(pages)} total pages in PDF")
    print("=" * 60)


 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	"""

	extract_pdf_scales.py
	This tool extracts scale information from Bluebeam-calibrated PDF files. It identifies scale ruler values, drawing scale annotations, and slope annotations embedded in the PDF metadata.
	qpdf: https://github.com/qpdf/qpdf

	Usage:
	python extract_pdf_scales.py file.json

	Author: Madhukar Moogala

	## Prerequisites
	- Python 3.x installed
	- qpdf command-line tool installed
	- Bluebeam-calibrated PDF file

	## Usage Instructions

	### Step 1: Convert PDF to JSON
	Generate JSON data from your PDF using qpdf:
	```bash
	qpdf --json --json-stream-data=inline your_file.pdf > file.json
	```

	### Step 2: Run the Python Script
	Extract scale information:
	```bash
	python extract_pdf_scales.py file.json
	```

	## Output
	The script will display:
	1. Scale Ruler Values - All measurement scales available in Bluebeam (e.g., 1/4" = 1', 1:48 ratio)
	2. Drawing Scale Annotations - Actual scales used on specific drawings
	3. Slope Annotations - Roof slopes and construction details (e.g., SLOPE 4:12)
	4. Summary - Total counts and PDF information

	## Example Output
	```
	============================================================
	PDF SCALE EXTRACTION RESULTS
	============================================================
	Total Pages: 15

	1. SCALE RULER VALUES (15 found):
	----------------------------------------
	1. 0.03125" = 1' (1:384 ) - obj:2863 0 R
	2. 0.0625" = 1' (1:192 ) - obj:2865 0 R
	3. 0.25" = 1' (1:48 ) - obj:2861 0 R
	...

	2. DRAWING SCALE ANNOTATIONS (9 found):
	----------------------------------------
	SCALE: 1/2"=1'-0"
	(Found in 9 locations)

	3. SLOPE ANNOTATIONS (4 found):
	----------------------------------------
	SLOPE 4:12
	(Found in 4 locations)
	```
	"""

	import json
	import sys
	import re
	from collections import defaultdict

	if len(sys.argv) < 2:
	print("Usage: python extract_pdf_scales.py file.json")
	sys.exit(1)


	def main():
	json_file = sys.argv[1]

	with open(json_file, "r", encoding="utf-8") as fh:
	data = json.load(fh)

	print("=" * 60)
	print("PDF SCALE EXTRACTION RESULTS")
	print("=" * 60)

	# Get page information
	pages = data.get("pages", [])
	print(f"Total Pages: {len(pages)}")

	# Extract scale information
	qpdf_list = data.get("qpdf", [])
	if len(qpdf_list) < 2:
	print("No object data found")
	return

	objects_dict = qpdf_list[1]

	# Find scale ruler values (/R fields)
	scale_rulers = []
	scale_annotations = []
	slope_annotations = []

	for key, obj in objects_dict.items():
	if key.startswith("obj:") and isinstance(obj, dict):
	value = obj.get("value")
	if isinstance(value, dict):
	# Scale ruler values
	r_field = value.get("/R")
	if isinstance(r_field, str) and "in = 1 ft" in r_field:
	match = re.search(r"(\d\.?\d+)\sin\s=\s1\s*ft", r_field)
	if match:
	inches = float(match.group(1))
	ratio = int(12 / inches)
	scale_rulers.append(
	{
	"inches": inches,
	"scale": f"{inches}\" = 1'",
	"ratio": f"1:{ratio}",
	"object": key,
	}
	)

	# Scale annotations
	contents = value.get("/Contents")
	if isinstance(contents, str):
	if "SCALE:" in contents and "=" in contents:
	scale_annotations.append(
	{"content": contents.replace("u:", ""), "object": key}
	)
	elif "SLOPE" in contents and ":" in contents:
	slope_annotations.append(
	{"content": contents.replace("u:", ""), "object": key}
	)

	# Sort scale rulers by size
	scale_rulers.sort(key=lambda x: x["inches"])

	# Display results
	print(f"\n1. SCALE RULER VALUES ({len(scale_rulers)} found):")
	print("-" * 40)
	for i, scale in enumerate(scale_rulers, 1):
	print(f"{i:2d}. {scale['scale']:12} ({scale['ratio']:6}) - {scale['object']}")

	if scale_annotations:
	print(f"\n2. DRAWING SCALE ANNOTATIONS ({len(scale_annotations)} found):")
	print("-" * 40)
	unique_scales = set()
	for annotation in scale_annotations:
	if annotation["content"] not in unique_scales:
	unique_scales.add(annotation["content"])
	print(f" {annotation['content']}")
	print(f" (Found in {len(scale_annotations)} locations)")

	if slope_annotations:
	print(f"\n3. SLOPE ANNOTATIONS ({len(slope_annotations)} found):")
	print("-" * 40)
	unique_slopes = set()
	for annotation in slope_annotations:
	if annotation["content"] not in unique_slopes:
	unique_slopes.add(annotation["content"])
	print(f" {annotation['content']}")
	print(f" (Found in {len(slope_annotations)} locations)")

	# Summary
	print(f"\n" + "=" * 60)
	print("SUMMARY:")
	print(f"• {len(scale_rulers)} scale ruler increments (for measurements)")
	print(
	f"• {len(set(s['content'] for s in scale_annotations))} unique drawing scales"
	)
	print(
	f"• {len(set(s['content'] for s in slope_annotations))} unique slope annotations"
	)
	print(f"• {len(pages)} total pages in PDF")
	print("=" * 60)


	if __name__ == "__main__":
	main()
No results found