Created
July 20, 2024 04:35
-
-
Save OnceUponALoop/a346aa9bb40ed1183d428ee753334c2d to your computer and use it in GitHub Desktop.
This script provides functions to encode and decode PlantUML text. The encoding process compresses the text using zlib and then encodes it using a custom base64-like encoding suitable for PlantUML servers. The decoding process reverses these steps. It also allows extracting and decoding PlantUML text from SVG comments.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
PlantUML Encoder/Decoder | |
======================== | |
This script provides functions to encode and decode PlantUML text. The encoding | |
process compresses the text using zlib and then encodes it using a custom | |
base64-like encoding suitable for PlantUML servers. The decoding process | |
reverses these steps. It also allows extracting and decoding PlantUML text | |
from SVG comments. | |
Usage: | |
# To encode PlantUML text | |
python plantuml_codec.py -e "your_plantuml_text_here" | |
# To decode encoded PlantUML text | |
python plantuml_codec.py -d "your_encoded_plantuml_text_here" | |
# To extract and decode PlantUML text from an SVG file | |
python plantuml_codec.py -s "your_svg_file_path" | |
Author: | |
Firas AlShafei <[email protected]> | |
Version: | |
1.0.0 | |
""" | |
import zlib | |
import base64 | |
import string | |
import sys | |
import argparse | |
import re | |
__author__ = "Firas AlShafei" | |
__email__ = "[email protected]" | |
__version__ = "1.0.0" | |
# Define the PlantUML and Base64 alphabets | |
plantuml_alphabet = string.digits + string.ascii_uppercase + string.ascii_lowercase + '-_' | |
base64_alphabet = string.ascii_uppercase + string.ascii_lowercase + string.digits + '+/' | |
# Create translation tables for custom base64 encoding to PlantUML encoding and vice versa | |
b64_to_plantuml = bytes.maketrans(base64_alphabet.encode('utf-8'), plantuml_alphabet.encode('utf-8')) | |
plantuml_to_b64 = bytes.maketrans(plantuml_alphabet.encode('utf-8'), base64_alphabet.encode('utf-8')) | |
def encode6bit(b): | |
""" | |
Encodes a 6-bit value into a character. | |
Parameters: | |
b (int): The 6-bit value to encode. | |
Returns: | |
str: The encoded character. | |
""" | |
if b < 10: | |
return chr(48 + b) | |
b -= 10 | |
if b < 26: | |
return chr(65 + b) | |
b -= 26 | |
if b < 26: | |
return chr(97 + b) | |
b -= 26 | |
if b == 0: | |
return '-' | |
if b == 1: | |
return '_' | |
return '?' | |
def append3bytes(b1, b2, b3): | |
""" | |
Encodes three bytes into four 6-bit characters. | |
Parameters: | |
b1 (int): The first byte. | |
b2 (int): The second byte. | |
b3 (int): The third byte. | |
Returns: | |
str: The encoded 6-bit characters. | |
""" | |
c1 = b1 >> 2 | |
c2 = ((b1 & 0x3) << 4) | (b2 >> 4) | |
c3 = ((b2 & 0xF) << 2) | (b3 >> 6) | |
c4 = b3 & 0x3F | |
r = '' | |
r += encode6bit(c1 & 0x3F) | |
r += encode6bit(c2 & 0x3F) | |
r += encode6bit(c3 & 0x3F) | |
r += encode6bit(c4 & 0x3F) | |
return r | |
def custom_base64_encoder(data): | |
""" | |
Custom base64-like encoder. | |
Parameters: | |
data (bytes): The deflated data to encode. | |
Returns: | |
str: The custom base64-like encoded string. | |
""" | |
r = '' | |
for i in range(0, len(data), 3): | |
if i + 2 == len(data): | |
r += append3bytes(data[i], data[i + 1], 0) | |
elif i + 1 == len(data): | |
r += append3bytes(data[i], 0, 0) | |
else: | |
r += append3bytes(data[i], data[i + 1], data[i + 2]) | |
return r | |
def deflate(data): | |
""" | |
Compresses data using zlib with the highest compression level. | |
Parameters: | |
data (str): The input string to compress. | |
Returns: | |
bytes: The deflated data. | |
""" | |
return zlib.compress(data.encode('utf-8'), level=9)[2:-4] | |
def encode(puml): | |
""" | |
Encodes a PlantUML string by first deflating it and then applying custom base64 encoding. | |
Parameters: | |
puml (str): The PlantUML string to encode. | |
Returns: | |
str: The encoded string. | |
""" | |
deflated = deflate(puml) | |
return custom_base64_encoder(deflated) | |
# Close, but not quite. The code is almost identical, but there's a padding issue it seems | |
# | |
# > python encode.py -d 'SyfFKj2rKt3CoKnELR1Io4ZDoSa70000' | |
# Bob -> Alice : hello | |
# | |
# > python plantuml_codec.py -e 'Bob -> Alice : hello' | |
# SyfFKj2rKt3CoKnELR1Io4ZDoSa700== | |
# | |
# def deflate_and_encode(plantuml_text): | |
# """ | |
# Compress the PlantUML text using zlib and encode it for the PlantUML server. | |
# Parameters: | |
# plantuml_text (str): The PlantUML text to encode. | |
# Returns: | |
# str: The encoded string. | |
# """ | |
# zlibbed_str = zlib.compress(plantuml_text.encode('utf-8')) | |
# compressed_string = zlibbed_str[2:-4] | |
# encoded_bytes = base64.b64encode(compressed_string).translate(b64_to_plantuml) | |
# return encoded_bytes.decode('utf-8') | |
def decode(encoded_text): | |
""" | |
Decode a custom base64 encoded, deflated string. | |
Parameters: | |
encoded_text (str): The base64 encoded, deflated string. | |
Returns: | |
str: The decoded and decompressed data. | |
""" | |
try: | |
# Translate the PlantUML encoding to standard base64 | |
standard_b64 = encoded_text.encode('utf-8').translate(plantuml_to_b64) | |
# Decode the base64 | |
decoded_data = base64.b64decode(standard_b64) | |
# Inflate the decoded data | |
inflated_data = zlib.decompress(decoded_data, -zlib.MAX_WBITS) | |
return inflated_data.decode('utf-8') | |
except Exception as e: | |
raise RuntimeError(f"Error decoding PlantUML text: {e}") | |
def extract_and_decode_from_svg(svg_file): | |
""" | |
Extract the encoded PlantUML text from the SVG file and decode it. | |
Parameters: | |
svg_file (str): The path to the SVG file. | |
Returns: | |
str: The decoded PlantUML text. | |
""" | |
try: | |
with open(svg_file, 'r', encoding='utf-8') as file: | |
svg_content = file.read() | |
# Extract the encoded PlantUML text from the SVG comment | |
match = re.search(r'<!--SRC=\[([A-Za-z0-9\-_]+)\]-->', svg_content) | |
if not match: | |
raise ValueError("No encoded PlantUML text found in SVG file.") | |
encoded_text = match.group(1) | |
# Decode the extracted text | |
decoded_text = decode_and_inflate(encoded_text) | |
return decoded_text | |
except Exception as e: | |
raise RuntimeError(f"Error extracting and decoding PlantUML text from SVG: {e}") | |
def main(): | |
""" | |
Main function to encode or decode the provided PlantUML text based on command-line flags. | |
""" | |
# Set up argument parser | |
parser = argparse.ArgumentParser(description="Encode or decode PlantUML text, or extract and decode from SVG.") | |
group = parser.add_mutually_exclusive_group(required=True) | |
group.add_argument('-e', '--encode', help="Encode the provided PlantUML text", action='store_true') | |
group.add_argument('-d', '--decode', help="Decode the provided encoded PlantUML text", action='store_true') | |
group.add_argument('-s', '--svg', help="Extract and decode PlantUML text from the provided SVG file", action='store_true') | |
parser.add_argument('text', help="The PlantUML text to encode, the encoded text to decode, or the SVG file path") | |
# Parse arguments | |
args = parser.parse_args() | |
# Perform the requested action | |
try: | |
if args.encode: | |
result = encode(args.text) | |
elif args.decode: | |
result = decode(args.text) | |
elif args.svg: | |
result = extract_and_decode_from_svg(args.text) | |
print(result) | |
except Exception as e: | |
print(f"Operation failed: {e}") | |
if __name__ == "__main__": | |
main() |
Author
OnceUponALoop
commented
Jul 20, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment