dchaplinsky · February 7, 2025 10:49
diff --git a/README.md b/README.md
diff --git a/markdown_to_png.py b/markdown_to_png.py
 import markdown
 import imgkit
 from typing import Optional, Dict
 import os
 from markdown.inlinepatterns import ImageInlineProcessor, IMAGE_LINK_RE
 from markdown.extensions import Extension

 # Default styling configuration
 DEFAULT_FONT_FAMILY = "Arial"
 DEFAULT_FONT_SIZE = 14
 DEFAULT_WIDTH = 800
 DEFAULT_BG_COLOR = "white"
 DEFAULT_TEXT_COLOR = "black"


 class ImagePathProcessor(ImageInlineProcessor):
    """
    A custom markdown inline processor that converts relative image paths to absolute paths.
    This processor extends the default image processor to handle path resolution while
    maintaining all the standard markdown image syntax features.
    """

    def __init__(self, base_dir: str, *args, **kwargs):
        """
        Initialize the processor with a base directory for resolving relative paths.

        Args:
            base_dir: The base directory to resolve relative paths against
            *args, **kwargs: Arguments passed to the parent ImageInlineProcessor
        """
        self.base_dir = base_dir
        super().__init__(*args, **kwargs)

    def handleMatch(self, m, data):
        """
        Process each image match in the markdown text.

        This method is called for each image found in the markdown text. It converts
        relative paths to absolute paths while leaving absolute paths and URLs unchanged.

        Args:
            m: The regex match object
            data: The source text data

        Returns:
            A tuple of (ElementTree.Element, start_index, end_index)
        """
        el, start, end = super().handleMatch(m, data)

        if el is not None:
            src = el.get("src")
            if src and not src.startswith(("http://", "https://", "/")):
                # Convert relative path to absolute path
                absolute_path = os.path.abspath(os.path.join(self.base_dir, src))
                el.set("src", absolute_path)
        return el, start, end


 class ImagePathExtension(Extension):
    """
    A markdown extension that adds support for converting relative image paths to absolute paths.
    This extension replaces the default image processor with our custom one.
    """

    def __init__(self, base_dir: str, **kwargs):
        """
        Initialize the extension with a base directory for path resolution.

        Args:
            base_dir: The base directory to resolve relative paths against
            **kwargs: Additional arguments passed to the Extension class
        """
        self.base_dir = base_dir
        super().__init__(**kwargs)

    def extendMarkdown(self, md):
        """
        Add this extension to the markdown parser.

        This method is called by the markdown parser to integrate our custom image processor.
        It replaces the default image pattern with our custom one while maintaining the
        same priority and pattern matching.

        Args:
            md: The markdown parser instance
        """

        # Add our custom image pattern with the same priority
        image_pattern = ImagePathProcessor(self.base_dir, IMAGE_LINK_RE, md)
        md.inlinePatterns.register(image_pattern, "image_link", 150)


 def get_default_css(
    font_family: str = DEFAULT_FONT_FAMILY,
    font_size: int = DEFAULT_FONT_SIZE,
    width: int = DEFAULT_WIDTH,
    background_color: str = DEFAULT_BG_COLOR,
    text_color: str = DEFAULT_TEXT_COLOR,
 ) -> str:
    """
    Generate default CSS styling for the HTML document.

    The CSS includes styling for basic elements like body, headings, images,
    code blocks, and maintains a clean, readable layout.
    """
    return f"""
        body {{
            font-family: {font_family}, sans-serif;
            font-size: {font_size}px;
            color: {text_color};
            background-color: {background_color};
            width: {width}px;
            margin: 20px;
            line-height: 1.5;
        }}
        img {{
            max-width: 100%;
            height: auto;
        }}
        h1 {{ font-size: 2em; margin-top: 0.67em; margin-bottom: 0.67em; }}
        h2 {{ font-size: 1.5em; margin-top: 0.83em; margin-bottom: 0.83em; }}
        h3 {{ font-size: 1.17em; margin-top: 1em; margin-bottom: 1em; }}
        p {{ margin-top: 1em; margin-bottom: 1em; }}
        code {{ 
            background-color: #f4f4f4;
            padding: 2px 4px;
            border-radius: 4px;
        }}
        pre {{
            background-color: #f4f4f4;
            padding: 1em;
            border-radius: 4px;
            overflow-x: auto;
        }}
    """


 def create_html(
    markdown_text: str,
    base_dir: str = ".",
    font_family: str = DEFAULT_FONT_FAMILY,
    font_size: int = DEFAULT_FONT_SIZE,
    width: int = DEFAULT_WIDTH,
    background_color: str = DEFAULT_BG_COLOR,
    text_color: str = DEFAULT_TEXT_COLOR,
 ) -> str:
    """
    Convert markdown text to styled HTML, resolving relative image paths.

    Args:
        markdown_text: Input markdown text to convert
        base_dir: Base directory for resolving relative image paths
        font_family: CSS font family to use
        font_size: Base font size in pixels
        width: Width of the output in pixels
        background_color: Background color of the output
        text_color: Color of the text

    Returns:
        Complete HTML document as a string
    """
    # Create the image path extension with the base directory
    image_extension = ImagePathExtension(base_dir)

    # Convert markdown to HTML using GitHub-style extras and our custom extension
    html_content = markdown.markdown(
        markdown_text,
        extensions=["extra", "codehilite", "fenced_code", image_extension],
    )

    # Get the CSS styling
    css = get_default_css(
        font_family=font_family,
        font_size=font_size,
        width=width,
        background_color=background_color,
        text_color=text_color,
    )

    # Create a complete HTML document
    return f"""
    <!DOCTYPE html>
    <html>
    <head>
        <meta charset="UTF-8">
        <style>
        {css}
        </style>
    </head>
    <body>
        {html_content}
    </body>
    </html>
    """


 def convert_markdown_to_png(
    markdown_text: str,
    output_path: str,
    base_dir: str = ".",
    font_family: str = DEFAULT_FONT_FAMILY,
    font_size: int = DEFAULT_FONT_SIZE,
    width: int = DEFAULT_WIDTH,
    background_color: str = DEFAULT_BG_COLOR,
    text_color: str = DEFAULT_TEXT_COLOR,
    imgkit_options: Optional[Dict] = None,
 ) -> None:
    """
    Convert markdown text to a PNG image and save it to a file.

    Args:
        markdown_text: Input markdown text to convert
        output_path: Path where the PNG file should be saved
        base_dir: Base directory for resolving relative image paths
        font_family: CSS font family to use
        font_size: Base font size in pixels
        width: Width of the output in pixels
        background_color: Background color of the output
        text_color: Color of the text
        imgkit_options: Optional dictionary of additional wkhtmltoimage options
    """
    # Set up default imgkit options
    default_options = {
        "format": "png",
        "encoding": "UTF-8",
        "quiet": "",
        "enable-local-file-access": "",  # Enable local file access for images
    }

    # Update with any custom options
    if imgkit_options:
        default_options.update(imgkit_options)

    # Create the HTML content with resolved image paths
    html_content = create_html(
        markdown_text,
        base_dir=base_dir,
        font_family=font_family,
        font_size=font_size,
        width=width,
        background_color=background_color,
        text_color=text_color,
    )

    # Convert HTML to PNG using wkhtmltoimage
    imgkit.from_string(html_content, output_path, options=default_options)


 if __name__ == "__main__":
    # Example usage
    markdown_text = """
 # Sample Markdown Document

 This is a paragraph with **bold** and *italic* text.

 ## Code Example

 ```python
 def hello_world():
    print("Hello, World!")
 ```

 ![Sample Image](water.jpeg)

 ### Lists

 * Item 1
 * Item 2
    * Nested item

 1. Numbered item
 2. Another numbered item
    """.strip()

    # Convert markdown to PNG with custom settings and image path resolution
    convert_markdown_to_png(
        markdown_text,
        "output.png",
        base_dir=os.path.dirname(
            __file__
        ),  # Use the current directory for image resolution
        font_family="Helvetica",
        font_size=16,
        width=1000,
        imgkit_options={"quality": 100},
    )
	import markdown
	import imgkit
	from typing import Optional, Dict
	import os
	from markdown.inlinepatterns import ImageInlineProcessor, IMAGE_LINK_RE
	from markdown.extensions import Extension

	# Default styling configuration
	DEFAULT_FONT_FAMILY = "Arial"
	DEFAULT_FONT_SIZE = 14
	DEFAULT_WIDTH = 800
	DEFAULT_BG_COLOR = "white"
	DEFAULT_TEXT_COLOR = "black"


	class ImagePathProcessor(ImageInlineProcessor):
	"""
	A custom markdown inline processor that converts relative image paths to absolute paths.
	This processor extends the default image processor to handle path resolution while
	maintaining all the standard markdown image syntax features.
	"""

	def __init__(self, base_dir: str, args, *kwargs):
	"""
	Initialize the processor with a base directory for resolving relative paths.

	Args:
	base_dir: The base directory to resolve relative paths against
	args, *kwargs: Arguments passed to the parent ImageInlineProcessor
	"""
	self.base_dir = base_dir
	super().__init__(args, *kwargs)

	def handleMatch(self, m, data):
	"""
	Process each image match in the markdown text.

	This method is called for each image found in the markdown text. It converts
	relative paths to absolute paths while leaving absolute paths and URLs unchanged.

	Args:
	m: The regex match object
	data: The source text data

	Returns:
	A tuple of (ElementTree.Element, start_index, end_index)
	"""
	el, start, end = super().handleMatch(m, data)

	if el is not None:
	src = el.get("src")
	if src and not src.startswith(("http://", "https://", "/")):
	# Convert relative path to absolute path
	absolute_path = os.path.abspath(os.path.join(self.base_dir, src))
	el.set("src", absolute_path)
	return el, start, end


	class ImagePathExtension(Extension):
	"""
	A markdown extension that adds support for converting relative image paths to absolute paths.
	This extension replaces the default image processor with our custom one.
	"""

	def __init__(self, base_dir: str, **kwargs):
	"""
	Initialize the extension with a base directory for path resolution.

	Args:
	base_dir: The base directory to resolve relative paths against
	**kwargs: Additional arguments passed to the Extension class
	"""
	self.base_dir = base_dir
	super().__init__(**kwargs)

	def extendMarkdown(self, md):
	"""
	Add this extension to the markdown parser.

	This method is called by the markdown parser to integrate our custom image processor.
	It replaces the default image pattern with our custom one while maintaining the
	same priority and pattern matching.

	Args:
	md: The markdown parser instance
	"""

	# Add our custom image pattern with the same priority
	image_pattern = ImagePathProcessor(self.base_dir, IMAGE_LINK_RE, md)
	md.inlinePatterns.register(image_pattern, "image_link", 150)


	def get_default_css(
	font_family: str = DEFAULT_FONT_FAMILY,
	font_size: int = DEFAULT_FONT_SIZE,
	width: int = DEFAULT_WIDTH,
	background_color: str = DEFAULT_BG_COLOR,
	text_color: str = DEFAULT_TEXT_COLOR,
	) -> str:
	"""
	Generate default CSS styling for the HTML document.

	The CSS includes styling for basic elements like body, headings, images,
	code blocks, and maintains a clean, readable layout.
	"""
	return f"""
	body {{
	font-family: {font_family}, sans-serif;
	font-size: {font_size}px;
	color: {text_color};
	background-color: {background_color};
	width: {width}px;
	margin: 20px;
	line-height: 1.5;
	}}
	img {{
	max-width: 100%;
	height: auto;
	}}
	h1 {{ font-size: 2em; margin-top: 0.67em; margin-bottom: 0.67em; }}
	h2 {{ font-size: 1.5em; margin-top: 0.83em; margin-bottom: 0.83em; }}
	h3 {{ font-size: 1.17em; margin-top: 1em; margin-bottom: 1em; }}
	p {{ margin-top: 1em; margin-bottom: 1em; }}
	code {{
	background-color: #f4f4f4;
	padding: 2px 4px;
	border-radius: 4px;
	}}
	pre {{
	background-color: #f4f4f4;
	padding: 1em;
	border-radius: 4px;
	overflow-x: auto;
	}}
	"""


	def create_html(
	markdown_text: str,
	base_dir: str = ".",
	font_family: str = DEFAULT_FONT_FAMILY,
	font_size: int = DEFAULT_FONT_SIZE,
	width: int = DEFAULT_WIDTH,
	background_color: str = DEFAULT_BG_COLOR,
	text_color: str = DEFAULT_TEXT_COLOR,
	) -> str:
	"""
	Convert markdown text to styled HTML, resolving relative image paths.

	Args:
	markdown_text: Input markdown text to convert
	base_dir: Base directory for resolving relative image paths
	font_family: CSS font family to use
	font_size: Base font size in pixels
	width: Width of the output in pixels
	background_color: Background color of the output
	text_color: Color of the text

	Returns:
	Complete HTML document as a string
	"""
	# Create the image path extension with the base directory
	image_extension = ImagePathExtension(base_dir)

	# Convert markdown to HTML using GitHub-style extras and our custom extension
	html_content = markdown.markdown(
	markdown_text,
	extensions=["extra", "codehilite", "fenced_code", image_extension],
	)

	# Get the CSS styling
	css = get_default_css(
	font_family=font_family,
	font_size=font_size,
	width=width,
	background_color=background_color,
	text_color=text_color,
	)

	# Create a complete HTML document
	return f"""
	<!DOCTYPE html>
	<html>
	<head>
	<meta charset="UTF-8">
	<style>
	{css}
	</style>
	</head>
	<body>
	{html_content}
	</body>
	</html>
	"""


	def convert_markdown_to_png(
	markdown_text: str,
	output_path: str,
	base_dir: str = ".",
	font_family: str = DEFAULT_FONT_FAMILY,
	font_size: int = DEFAULT_FONT_SIZE,
	width: int = DEFAULT_WIDTH,
	background_color: str = DEFAULT_BG_COLOR,
	text_color: str = DEFAULT_TEXT_COLOR,
	imgkit_options: Optional[Dict] = None,
	) -> None:
	"""
	Convert markdown text to a PNG image and save it to a file.

	Args:
	markdown_text: Input markdown text to convert
	output_path: Path where the PNG file should be saved
	base_dir: Base directory for resolving relative image paths
	font_family: CSS font family to use
	font_size: Base font size in pixels
	width: Width of the output in pixels
	background_color: Background color of the output
	text_color: Color of the text
	imgkit_options: Optional dictionary of additional wkhtmltoimage options
	"""
	# Set up default imgkit options
	default_options = {
	"format": "png",
	"encoding": "UTF-8",
	"quiet": "",
	"enable-local-file-access": "", # Enable local file access for images
	}

	# Update with any custom options
	if imgkit_options:
	default_options.update(imgkit_options)

	# Create the HTML content with resolved image paths
	html_content = create_html(
	markdown_text,
	base_dir=base_dir,
	font_family=font_family,
	font_size=font_size,
	width=width,
	background_color=background_color,
	text_color=text_color,
	)

	# Convert HTML to PNG using wkhtmltoimage
	imgkit.from_string(html_content, output_path, options=default_options)


	if __name__ == "__main__":
	# Example usage
	markdown_text = """
	# Sample Markdown Document

	This is a paragraph with bold and italic text.

	## Code Example

	```python
	def hello_world():
	print("Hello, World!")
	```

	![Sample Image](water.jpeg)

	### Lists

	* Item 1
	* Item 2
	* Nested item

	1. Numbered item
	2. Another numbered item
	""".strip()

	# Convert markdown to PNG with custom settings and image path resolution
	convert_markdown_to_png(
	markdown_text,
	"output.png",
	base_dir=os.path.dirname(
	__file__
	), # Use the current directory for image resolution
	font_family="Helvetica",
	font_size=16,
	width=1000,
	imgkit_options={"quality": 100},
	)