Skip to content

Instantly share code, notes, and snippets.

@OnceUponALoop
Created July 20, 2024 04:35
Show Gist options
  • Save OnceUponALoop/a346aa9bb40ed1183d428ee753334c2d to your computer and use it in GitHub Desktop.
Save OnceUponALoop/a346aa9bb40ed1183d428ee753334c2d to your computer and use it in GitHub Desktop.
This script provides functions to encode and decode PlantUML text. The encoding process compresses the text using zlib and then encodes it using a custom base64-like encoding suitable for PlantUML servers. The decoding process reverses these steps. It also allows extracting and decoding PlantUML text from SVG comments.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
PlantUML Encoder/Decoder
========================
This script provides functions to encode and decode PlantUML text. The encoding
process compresses the text using zlib and then encodes it using a custom
base64-like encoding suitable for PlantUML servers. The decoding process
reverses these steps. It also allows extracting and decoding PlantUML text
from SVG comments.
Usage:
# To encode PlantUML text
python plantuml_codec.py -e "your_plantuml_text_here"
# To decode encoded PlantUML text
python plantuml_codec.py -d "your_encoded_plantuml_text_here"
# To extract and decode PlantUML text from an SVG file
python plantuml_codec.py -s "your_svg_file_path"
Author:
Firas AlShafei <[email protected]>
Version:
1.0.0
"""
import zlib
import base64
import string
import sys
import argparse
import re
__author__ = "Firas AlShafei"
__email__ = "[email protected]"
__version__ = "1.0.0"
# Define the PlantUML and Base64 alphabets
plantuml_alphabet = string.digits + string.ascii_uppercase + string.ascii_lowercase + '-_'
base64_alphabet = string.ascii_uppercase + string.ascii_lowercase + string.digits + '+/'
# Create translation tables for custom base64 encoding to PlantUML encoding and vice versa
b64_to_plantuml = bytes.maketrans(base64_alphabet.encode('utf-8'), plantuml_alphabet.encode('utf-8'))
plantuml_to_b64 = bytes.maketrans(plantuml_alphabet.encode('utf-8'), base64_alphabet.encode('utf-8'))
def encode6bit(b):
"""
Encodes a 6-bit value into a character.
Parameters:
b (int): The 6-bit value to encode.
Returns:
str: The encoded character.
"""
if b < 10:
return chr(48 + b)
b -= 10
if b < 26:
return chr(65 + b)
b -= 26
if b < 26:
return chr(97 + b)
b -= 26
if b == 0:
return '-'
if b == 1:
return '_'
return '?'
def append3bytes(b1, b2, b3):
"""
Encodes three bytes into four 6-bit characters.
Parameters:
b1 (int): The first byte.
b2 (int): The second byte.
b3 (int): The third byte.
Returns:
str: The encoded 6-bit characters.
"""
c1 = b1 >> 2
c2 = ((b1 & 0x3) << 4) | (b2 >> 4)
c3 = ((b2 & 0xF) << 2) | (b3 >> 6)
c4 = b3 & 0x3F
r = ''
r += encode6bit(c1 & 0x3F)
r += encode6bit(c2 & 0x3F)
r += encode6bit(c3 & 0x3F)
r += encode6bit(c4 & 0x3F)
return r
def custom_base64_encoder(data):
"""
Custom base64-like encoder.
Parameters:
data (bytes): The deflated data to encode.
Returns:
str: The custom base64-like encoded string.
"""
r = ''
for i in range(0, len(data), 3):
if i + 2 == len(data):
r += append3bytes(data[i], data[i + 1], 0)
elif i + 1 == len(data):
r += append3bytes(data[i], 0, 0)
else:
r += append3bytes(data[i], data[i + 1], data[i + 2])
return r
def deflate(data):
"""
Compresses data using zlib with the highest compression level.
Parameters:
data (str): The input string to compress.
Returns:
bytes: The deflated data.
"""
return zlib.compress(data.encode('utf-8'), level=9)[2:-4]
def encode(puml):
"""
Encodes a PlantUML string by first deflating it and then applying custom base64 encoding.
Parameters:
puml (str): The PlantUML string to encode.
Returns:
str: The encoded string.
"""
deflated = deflate(puml)
return custom_base64_encoder(deflated)
# Close, but not quite. The code is almost identical, but there's a padding issue it seems
#
# > python encode.py -d 'SyfFKj2rKt3CoKnELR1Io4ZDoSa70000'
# Bob -> Alice : hello
#
# > python plantuml_codec.py -e 'Bob -> Alice : hello'
# SyfFKj2rKt3CoKnELR1Io4ZDoSa700==
#
# def deflate_and_encode(plantuml_text):
# """
# Compress the PlantUML text using zlib and encode it for the PlantUML server.
# Parameters:
# plantuml_text (str): The PlantUML text to encode.
# Returns:
# str: The encoded string.
# """
# zlibbed_str = zlib.compress(plantuml_text.encode('utf-8'))
# compressed_string = zlibbed_str[2:-4]
# encoded_bytes = base64.b64encode(compressed_string).translate(b64_to_plantuml)
# return encoded_bytes.decode('utf-8')
def decode(encoded_text):
"""
Decode a custom base64 encoded, deflated string.
Parameters:
encoded_text (str): The base64 encoded, deflated string.
Returns:
str: The decoded and decompressed data.
"""
try:
# Translate the PlantUML encoding to standard base64
standard_b64 = encoded_text.encode('utf-8').translate(plantuml_to_b64)
# Decode the base64
decoded_data = base64.b64decode(standard_b64)
# Inflate the decoded data
inflated_data = zlib.decompress(decoded_data, -zlib.MAX_WBITS)
return inflated_data.decode('utf-8')
except Exception as e:
raise RuntimeError(f"Error decoding PlantUML text: {e}")
def extract_and_decode_from_svg(svg_file):
"""
Extract the encoded PlantUML text from the SVG file and decode it.
Parameters:
svg_file (str): The path to the SVG file.
Returns:
str: The decoded PlantUML text.
"""
try:
with open(svg_file, 'r', encoding='utf-8') as file:
svg_content = file.read()
# Extract the encoded PlantUML text from the SVG comment
match = re.search(r'<!--SRC=\[([A-Za-z0-9\-_]+)\]-->', svg_content)
if not match:
raise ValueError("No encoded PlantUML text found in SVG file.")
encoded_text = match.group(1)
# Decode the extracted text
decoded_text = decode_and_inflate(encoded_text)
return decoded_text
except Exception as e:
raise RuntimeError(f"Error extracting and decoding PlantUML text from SVG: {e}")
def main():
"""
Main function to encode or decode the provided PlantUML text based on command-line flags.
"""
# Set up argument parser
parser = argparse.ArgumentParser(description="Encode or decode PlantUML text, or extract and decode from SVG.")
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-e', '--encode', help="Encode the provided PlantUML text", action='store_true')
group.add_argument('-d', '--decode', help="Decode the provided encoded PlantUML text", action='store_true')
group.add_argument('-s', '--svg', help="Extract and decode PlantUML text from the provided SVG file", action='store_true')
parser.add_argument('text', help="The PlantUML text to encode, the encoded text to decode, or the SVG file path")
# Parse arguments
args = parser.parse_args()
# Perform the requested action
try:
if args.encode:
result = encode(args.text)
elif args.decode:
result = decode(args.text)
elif args.svg:
result = extract_and_decode_from_svg(args.text)
print(result)
except Exception as e:
print(f"Operation failed: {e}")
if __name__ == "__main__":
main()
@OnceUponALoop
Copy link
Author

usage: plantuml_codec.py [-h] (-e | -d | -s) text

Encode or decode PlantUML text, or extract and decode from SVG.

positional arguments:
  text          The PlantUML text to encode, the encoded text to decode, or the SVG file path

options:
  -h, --help    show this help message and exit
  -e, --encode  Encode the provided PlantUML text
  -d, --decode  Decode the provided encoded PlantUML text
  -s, --svg     Extract and decode PlantUML text from the provided SVG file

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment