BexTuychiev · February 18, 2026 12:33
diff --git a/notebook_to_docx.py b/notebook_to_docx.py
 #!/usr/bin/env python3
 """
 Notebook to DOCX Converter

 Converts Jupyter notebooks to Word documents with proper formatting:
 - Markdown formatting preserved as Word styles
 - Backticks preserved around inline code
 - Code blocks with triple backticks visible, Courier New font
 - Non-code text in Poppins font
 - Images with alt text built-in
 - Clickable hyperlinks
 """

 import sys
 import re
 import os
 from pathlib import Path

 import nbformat
 from docx import Document
 from docx.shared import Pt, Inches, Twips
 from docx.enum.style import WD_STYLE_TYPE
 from docx.oxml.ns import qn
 from docx.oxml import OxmlElement
 from PIL import Image


 def add_hyperlink(paragraph, text, url, bold=False):
    """Add a clickable hyperlink to a paragraph."""
    part = paragraph.part
    r_id = part.relate_to(
        url,
        "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink",
        is_external=True
    )

    hyperlink = OxmlElement('w:hyperlink')
    hyperlink.set(qn('r:id'), r_id)

    new_run = OxmlElement('w:r')
    rPr = OxmlElement('w:rPr')

    # Blue color for links
    color = OxmlElement('w:color')
    color.set(qn('w:val'), '0563C1')
    rPr.append(color)

    # Underline
    u = OxmlElement('w:u')
    u.set(qn('w:val'), 'single')
    rPr.append(u)

    # Font
    rFonts = OxmlElement('w:rFonts')
    rFonts.set(qn('w:ascii'), 'Poppins')
    rFonts.set(qn('w:hAnsi'), 'Poppins')
    rPr.append(rFonts)

    # Bold if needed
    if bold:
        b = OxmlElement('w:b')
        rPr.append(b)

    new_run.append(rPr)

    text_elem = OxmlElement('w:t')
    text_elem.text = text
    new_run.append(text_elem)

    hyperlink.append(new_run)
    paragraph._p.append(hyperlink)

    return hyperlink


 def set_image_alt_text(inline, alt_text):
    """Set alt text on an inline image by modifying the XML."""
    # Find the docPr element and set the descr attribute
    for child in inline.iter():
        if 'docPr' in child.tag:
            child.set('descr', alt_text)
            child.set('title', alt_text)
            break


 def create_styles(doc):
    """Create custom styles for the document."""
    styles = doc.styles

    # Code block style with tight line spacing
    if 'CodeBlock' not in [s.name for s in styles]:
        code_style = styles.add_style('CodeBlock', WD_STYLE_TYPE.PARAGRAPH)
        code_style.font.name = 'Courier New'
        code_style.font.size = Pt(10)
        code_style.paragraph_format.space_before = Pt(0)
        code_style.paragraph_format.space_after = Pt(0)
        code_style.paragraph_format.line_spacing = 1.0


 def process_inline_formatting(paragraph, text, base_font='Poppins', inherited_bold=False, inherited_italic=False):
    """Process inline markdown formatting and add to paragraph.

    Handles nested formatting like **[link](url)** by recursively processing.
    """
    if not text:
        return

    # Patterns - order matters
    patterns = [
        # Bold links: **[text](url)**
        (r'\*\*\[([^\]]+)\]\(([^)]+)\)\*\*', 'bold_link'),
        # Italic links: *[text](url)*
        (r'\*\[([^\]]+)\]\(([^)]+)\)\*', 'italic_link'),
        # Bold text (may contain nested formatting)
        (r'\*\*(.+?)\*\*', 'bold'),
        # Italic text
        (r'\*([^*]+?)\*', 'italic'),
        # Inline code with backticks - preserve backticks
        (r'`([^`]+)`', 'code'),
        # Regular links
        (r'\[([^\]]+)\]\(([^)]+)\)', 'link'),
    ]

    remaining = text

    while remaining:
        earliest_match = None
        earliest_pos = len(remaining)
        match_type = None
        match_pattern = None

        for pattern, ptype in patterns:
            match = re.search(pattern, remaining)
            if match and match.start() < earliest_pos:
                earliest_match = match
                earliest_pos = match.start()
                match_type = ptype

        if earliest_match is None:
            # No more patterns, add remaining text
            if remaining:
                run = paragraph.add_run(remaining)
                run.font.name = base_font
                run.font.size = Pt(11)
                if inherited_bold:
                    run.bold = True
                if inherited_italic:
                    run.italic = True
            break

        # Add text before the match
        if earliest_pos > 0:
            run = paragraph.add_run(remaining[:earliest_pos])
            run.font.name = base_font
            run.font.size = Pt(11)
            if inherited_bold:
                run.bold = True
            if inherited_italic:
                run.italic = True

        # Process the match
        if match_type == 'bold_link':
            link_text = earliest_match.group(1)
            link_url = earliest_match.group(2)
            add_hyperlink(paragraph, link_text, link_url, bold=True)

        elif match_type == 'italic_link':
            link_text = earliest_match.group(1)
            link_url = earliest_match.group(2)
            # Add italic hyperlink (need custom handling)
            add_hyperlink(paragraph, link_text, link_url, bold=False)

        elif match_type == 'bold':
            inner_text = earliest_match.group(1)
            # Check if inner text contains a link
            link_match = re.match(r'\[([^\]]+)\]\(([^)]+)\)', inner_text)
            if link_match:
                add_hyperlink(paragraph, link_match.group(1), link_match.group(2), bold=True)
            else:
                # Recursively process for other nested formatting
                process_inline_formatting(paragraph, inner_text, base_font,
                                         inherited_bold=True, inherited_italic=inherited_italic)

        elif match_type == 'italic':
            inner_text = earliest_match.group(1)
            process_inline_formatting(paragraph, inner_text, base_font,
                                     inherited_bold=inherited_bold, inherited_italic=True)

        elif match_type == 'code':
            # Preserve backticks around inline code
            code_text = earliest_match.group(1)
            run = paragraph.add_run(f'`{code_text}`')
            run.font.name = 'Courier New'
            run.font.size = Pt(10)

        elif match_type == 'link':
            link_text = earliest_match.group(1)
            link_url = earliest_match.group(2)
            add_hyperlink(paragraph, link_text, link_url, bold=inherited_bold)

        remaining = remaining[earliest_match.end():]


 def add_image_with_alt(doc, image_path, alt_text, base_path=None):
    """Add an image to the document with alt text."""
    # Resolve image path
    if base_path and not os.path.isabs(image_path):
        full_path = os.path.join(base_path, image_path)
    else:
        full_path = image_path

    if not os.path.exists(full_path):
        # Try without leading path components
        if base_path:
            filename = os.path.basename(image_path)
            for root, dirs, files in os.walk(base_path):
                if filename in files:
                    full_path = os.path.join(root, filename)
                    break

    if not os.path.exists(full_path):
        p = doc.add_paragraph()
        run = p.add_run(f'[Image not found: {image_path}]')
        run.font.name = 'Poppins'
        run.italic = True
        return

    # Get image dimensions and scale appropriately
    try:
        with Image.open(full_path) as img:
            width, height = img.size
            max_width = Inches(6)

            if width > 600:
                doc_width = max_width
            else:
                doc_width = Inches(width / 100)
                if doc_width > max_width:
                    doc_width = max_width
    except Exception:
        doc_width = Inches(5)

    # Add image
    paragraph = doc.add_paragraph()
    run = paragraph.add_run()

    try:
        inline = run.add_picture(full_path, width=doc_width)
        # Set alt text via XML manipulation
        inline_element = inline._inline
        set_image_alt_text(inline_element, alt_text or 'Image')
    except Exception as e:
        run = paragraph.add_run(f'[Error loading image: {e}]')
        run.font.name = 'Poppins'
        run.italic = True


 def add_code_block(doc, code_lines):
    """Add a code block with tight line spacing."""
    for code_line in code_lines:
        p = doc.add_paragraph()
        # Set tight line spacing
        p.paragraph_format.space_before = Pt(0)
        p.paragraph_format.space_after = Pt(0)
        p.paragraph_format.line_spacing = 1.0

        run = p.add_run(code_line if code_line else ' ')  # Empty lines need a space
        run.font.name = 'Courier New'
        run.font.size = Pt(10)


 def parse_markdown_table(lines, start_index):
    """Parse a markdown table starting at the given index.

    Returns (table_data, end_index) where table_data is a list of rows,
    each row being a list of cell contents.
    """
    table_rows = []
    i = start_index

    while i < len(lines):
        line = lines[i].strip()

        # Check if this line is part of the table
        if not line.startswith('|') and not line.endswith('|'):
            break

        # Skip separator lines (|---|---|)
        if re.match(r'^\|[\s\-:|\s]+\|$', line) or re.match(r'^\|?[\s\-:]+\|[\s\-:|]+\|?$', line):
            i += 1
            continue

        # Parse cells from the row
        # Remove leading/trailing pipes and split by |
        cells = line.strip('|').split('|')
        cells = [cell.strip() for cell in cells]

        if cells:
            table_rows.append(cells)

        i += 1

    return table_rows, i - 1


 def add_table_to_doc(doc, table_data):
    """Add a table to the document from parsed markdown table data."""
    if not table_data or not table_data[0]:
        return

    num_cols = len(table_data[0])
    num_rows = len(table_data)

    # Create table
    table = doc.add_table(rows=num_rows, cols=num_cols)
    table.style = 'Table Grid'

    # Fill in cells
    for row_idx, row_data in enumerate(table_data):
        row = table.rows[row_idx]
        for col_idx, cell_text in enumerate(row_data):
            if col_idx < len(row.cells):
                cell = row.cells[col_idx]
                # Clear existing content and add formatted text
                cell.text = ''
                paragraph = cell.paragraphs[0]
                process_inline_formatting(paragraph, cell_text)

                # Make header row bold
                if row_idx == 0:
                    for run in paragraph.runs:
                        run.bold = True

    # Add some space after the table
    doc.add_paragraph()


 def process_markdown_cell(doc, content, base_path=None):
    """Process a markdown cell and add to document."""
    lines = content.split('\n')
    i = 0

    while i < len(lines):
        line = lines[i]

        # Headers
        if line.startswith('######'):
            p = doc.add_heading(line[6:].strip(), level=6)
            for run in p.runs:
                run.font.name = 'Poppins'
        elif line.startswith('#####'):
            p = doc.add_heading(line[5:].strip(), level=5)
            for run in p.runs:
                run.font.name = 'Poppins'
        elif line.startswith('####'):
            p = doc.add_heading(line[4:].strip(), level=4)
            for run in p.runs:
                run.font.name = 'Poppins'
        elif line.startswith('###'):
            p = doc.add_heading(line[3:].strip(), level=3)
            for run in p.runs:
                run.font.name = 'Poppins'
        elif line.startswith('##'):
            p = doc.add_heading(line[2:].strip(), level=2)
            for run in p.runs:
                run.font.name = 'Poppins'
        elif line.startswith('#'):
            p = doc.add_heading(line[1:].strip(), level=1)
            for run in p.runs:
                run.font.name = 'Poppins'

        # Code blocks
        elif line.startswith('```'):
            lang = line[3:].strip()
            code_lines = [f'```{lang}']
            i += 1
            while i < len(lines) and not lines[i].startswith('```'):
                code_lines.append(lines[i])
                i += 1
            code_lines.append('```')

            # Add code block with tight spacing
            add_code_block(doc, code_lines)

        # Blockquotes
        elif line.startswith('>'):
            quote_text = line[1:].strip()
            p = doc.add_paragraph()
            p.paragraph_format.left_indent = Inches(0.5)
            process_inline_formatting(p, quote_text)

        # Unordered lists
        elif line.strip().startswith('- ') or line.strip().startswith('* '):
            list_text = line.strip()[2:]
            p = doc.add_paragraph(style='List Bullet')
            process_inline_formatting(p, list_text)

        # Ordered lists
        elif re.match(r'^\d+\.\s', line.strip()):
            list_text = re.sub(r'^\d+\.\s', '', line.strip())
            p = doc.add_paragraph(style='List Number')
            process_inline_formatting(p, list_text)

        # Images (standalone)
        elif re.match(r'^!\[([^\]]*)\]\(([^)]+)\)$', line.strip()):
            match = re.match(r'^!\[([^\]]*)\]\(([^)]+)\)$', line.strip())
            alt_text = match.group(1)
            image_path = match.group(2)
            add_image_with_alt(doc, image_path, alt_text, base_path)

        # Horizontal rule
        elif line.strip() in ['---', '***', '___']:
            p = doc.add_paragraph()
            p.add_run('─' * 50)

        # Empty line
        elif not line.strip():
            pass  # Skip empty lines

        # Tables (lines starting with |)
        elif line.strip().startswith('|'):
            table_data, end_index = parse_markdown_table(lines, i)
            if table_data:
                add_table_to_doc(doc, table_data)
            i = end_index

        # Regular paragraph
        else:
            p = doc.add_paragraph()
            process_inline_formatting(p, line)

        i += 1


 def process_code_cell(doc, source, outputs=None):
    """Process a code cell - show code with triple backticks."""
    code_lines = ['```python'] + source.split('\n') + ['```']
    add_code_block(doc, code_lines)


 def convert_notebook_to_docx(notebook_path, output_path=None):
    """Convert a Jupyter notebook to a Word document."""
    notebook_path = Path(notebook_path)

    if output_path is None:
        output_path = notebook_path.with_suffix('.docx')
    else:
        output_path = Path(output_path)

    # Read notebook
    with open(notebook_path, 'r', encoding='utf-8') as f:
        nb = nbformat.read(f, as_version=4)

    # Create document
    doc = Document()
    create_styles(doc)

    # Set default font for Normal style
    style = doc.styles['Normal']
    style.font.name = 'Poppins'
    style.font.size = Pt(11)

    # Base path for resolving relative image paths
    base_path = notebook_path.parent

    # Process cells
    for cell in nb.cells:
        if cell.cell_type == 'markdown':
            process_markdown_cell(doc, cell.source, base_path)
        elif cell.cell_type == 'code':
            process_code_cell(doc, cell.source, cell.get('outputs', []))

    # Save document
    doc.save(output_path)
    print(f'Converted: {notebook_path} -> {output_path}')
    return output_path


 def main():
    if len(sys.argv) < 2:
        print('Usage: python notebook_to_docx.py <notebook_path> [output_path]')
        sys.exit(1)

    notebook_path = sys.argv[1]
    output_path = sys.argv[2] if len(sys.argv) > 2 else None

    convert_notebook_to_docx(notebook_path, output_path)


 if __name__ == '__main__':
    main()
	#!/usr/bin/env python3
	"""
	Notebook to DOCX Converter

	Converts Jupyter notebooks to Word documents with proper formatting:
	- Markdown formatting preserved as Word styles
	- Backticks preserved around inline code
	- Code blocks with triple backticks visible, Courier New font
	- Non-code text in Poppins font
	- Images with alt text built-in
	- Clickable hyperlinks
	"""

	import sys
	import re
	import os
	from pathlib import Path

	import nbformat
	from docx import Document
	from docx.shared import Pt, Inches, Twips
	from docx.enum.style import WD_STYLE_TYPE
	from docx.oxml.ns import qn
	from docx.oxml import OxmlElement
	from PIL import Image


	def add_hyperlink(paragraph, text, url, bold=False):
	"""Add a clickable hyperlink to a paragraph."""
	part = paragraph.part
	r_id = part.relate_to(
	url,
	"http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink",
	is_external=True
	)

	hyperlink = OxmlElement('w:hyperlink')
	hyperlink.set(qn('r:id'), r_id)

	new_run = OxmlElement('w:r')
	rPr = OxmlElement('w:rPr')

	# Blue color for links
	color = OxmlElement('w:color')
	color.set(qn('w:val'), '0563C1')
	rPr.append(color)

	# Underline
	u = OxmlElement('w:u')
	u.set(qn('w:val'), 'single')
	rPr.append(u)

	# Font
	rFonts = OxmlElement('w:rFonts')
	rFonts.set(qn('w:ascii'), 'Poppins')
	rFonts.set(qn('w:hAnsi'), 'Poppins')
	rPr.append(rFonts)

	# Bold if needed
	if bold:
	b = OxmlElement('w:b')
	rPr.append(b)

	new_run.append(rPr)

	text_elem = OxmlElement('w:t')
	text_elem.text = text
	new_run.append(text_elem)

	hyperlink.append(new_run)
	paragraph._p.append(hyperlink)

	return hyperlink


	def set_image_alt_text(inline, alt_text):
	"""Set alt text on an inline image by modifying the XML."""
	# Find the docPr element and set the descr attribute
	for child in inline.iter():
	if 'docPr' in child.tag:
	child.set('descr', alt_text)
	child.set('title', alt_text)
	break


	def create_styles(doc):
	"""Create custom styles for the document."""
	styles = doc.styles

	# Code block style with tight line spacing
	if 'CodeBlock' not in [s.name for s in styles]:
	code_style = styles.add_style('CodeBlock', WD_STYLE_TYPE.PARAGRAPH)
	code_style.font.name = 'Courier New'
	code_style.font.size = Pt(10)
	code_style.paragraph_format.space_before = Pt(0)
	code_style.paragraph_format.space_after = Pt(0)
	code_style.paragraph_format.line_spacing = 1.0


	def process_inline_formatting(paragraph, text, base_font='Poppins', inherited_bold=False, inherited_italic=False):
	"""Process inline markdown formatting and add to paragraph.

	Handles nested formatting like [link](url) by recursively processing.
	"""
	if not text:
	return

	# Patterns - order matters
	patterns = [
	# Bold links: [text](url)
	(r'\\\[([^\]]+)\]\(([^)]+)\)\\', 'bold_link'),
	# Italic links: [text](url)
	(r'\\[([^\]]+)\]\(([^)]+)\)\', 'italic_link'),
	# Bold text (may contain nested formatting)
	(r'\\(.+?)\\', 'bold'),
	# Italic text
	(r'\([^]+?)\*', 'italic'),
	# Inline code with backticks - preserve backticks
	(r'`([^`]+)`', 'code'),
	# Regular links
	(r'\[([^\]]+)\]\(([^)]+)\)', 'link'),
	]

	remaining = text

	while remaining:
	earliest_match = None
	earliest_pos = len(remaining)
	match_type = None
	match_pattern = None

	for pattern, ptype in patterns:
	match = re.search(pattern, remaining)
	if match and match.start() < earliest_pos:
	earliest_match = match
	earliest_pos = match.start()
	match_type = ptype

	if earliest_match is None:
	# No more patterns, add remaining text
	if remaining:
	run = paragraph.add_run(remaining)
	run.font.name = base_font
	run.font.size = Pt(11)
	if inherited_bold:
	run.bold = True
	if inherited_italic:
	run.italic = True
	break

	# Add text before the match
	if earliest_pos > 0:
	run = paragraph.add_run(remaining[:earliest_pos])
	run.font.name = base_font
	run.font.size = Pt(11)
	if inherited_bold:
	run.bold = True
	if inherited_italic:
	run.italic = True

	# Process the match
	if match_type == 'bold_link':
	link_text = earliest_match.group(1)
	link_url = earliest_match.group(2)
	add_hyperlink(paragraph, link_text, link_url, bold=True)

	elif match_type == 'italic_link':
	link_text = earliest_match.group(1)
	link_url = earliest_match.group(2)
	# Add italic hyperlink (need custom handling)
	add_hyperlink(paragraph, link_text, link_url, bold=False)

	elif match_type == 'bold':
	inner_text = earliest_match.group(1)
	# Check if inner text contains a link
	link_match = re.match(r'\[([^\]]+)\]\(([^)]+)\)', inner_text)
	if link_match:
	add_hyperlink(paragraph, link_match.group(1), link_match.group(2), bold=True)
	else:
	# Recursively process for other nested formatting
	process_inline_formatting(paragraph, inner_text, base_font,
	inherited_bold=True, inherited_italic=inherited_italic)

	elif match_type == 'italic':
	inner_text = earliest_match.group(1)
	process_inline_formatting(paragraph, inner_text, base_font,
	inherited_bold=inherited_bold, inherited_italic=True)

	elif match_type == 'code':
	# Preserve backticks around inline code
	code_text = earliest_match.group(1)
	run = paragraph.add_run(f'`{code_text}`')
	run.font.name = 'Courier New'
	run.font.size = Pt(10)

	elif match_type == 'link':
	link_text = earliest_match.group(1)
	link_url = earliest_match.group(2)
	add_hyperlink(paragraph, link_text, link_url, bold=inherited_bold)

	remaining = remaining[earliest_match.end():]


	def add_image_with_alt(doc, image_path, alt_text, base_path=None):
	"""Add an image to the document with alt text."""
	# Resolve image path
	if base_path and not os.path.isabs(image_path):
	full_path = os.path.join(base_path, image_path)
	else:
	full_path = image_path

	if not os.path.exists(full_path):
	# Try without leading path components
	if base_path:
	filename = os.path.basename(image_path)
	for root, dirs, files in os.walk(base_path):
	if filename in files:
	full_path = os.path.join(root, filename)
	break

	if not os.path.exists(full_path):
	p = doc.add_paragraph()
	run = p.add_run(f'[Image not found: {image_path}]')
	run.font.name = 'Poppins'
	run.italic = True
	return

	# Get image dimensions and scale appropriately
	try:
	with Image.open(full_path) as img:
	width, height = img.size
	max_width = Inches(6)

	if width > 600:
	doc_width = max_width
	else:
	doc_width = Inches(width / 100)
	if doc_width > max_width:
	doc_width = max_width
	except Exception:
	doc_width = Inches(5)

	# Add image
	paragraph = doc.add_paragraph()
	run = paragraph.add_run()

	try:
	inline = run.add_picture(full_path, width=doc_width)
	# Set alt text via XML manipulation
	inline_element = inline._inline
	set_image_alt_text(inline_element, alt_text or 'Image')
	except Exception as e:
	run = paragraph.add_run(f'[Error loading image: {e}]')
	run.font.name = 'Poppins'
	run.italic = True


	def add_code_block(doc, code_lines):
	"""Add a code block with tight line spacing."""
	for code_line in code_lines:
	p = doc.add_paragraph()
	# Set tight line spacing
	p.paragraph_format.space_before = Pt(0)
	p.paragraph_format.space_after = Pt(0)
	p.paragraph_format.line_spacing = 1.0

	run = p.add_run(code_line if code_line else ' ') # Empty lines need a space
	run.font.name = 'Courier New'
	run.font.size = Pt(10)


	def parse_markdown_table(lines, start_index):
	"""Parse a markdown table starting at the given index.

	Returns (table_data, end_index) where table_data is a list of rows,
	each row being a list of cell contents.
	"""
	table_rows = []
	i = start_index

	while i < len(lines):
	line = lines[i].strip()

	# Check if this line is part of the table
	if not line.startswith('\|') and not line.endswith('\|'):
	break

	# Skip separator lines (\|---\|---\|)
	if re.match(r'^\\|[\s\-:\|\s]+\\|$', line) or re.match(r'^\\|?[\s\-:]+\\|[\s\-:\|]+\\|?$', line):
	i += 1
	continue

	# Parse cells from the row
	# Remove leading/trailing pipes and split by \|
	cells = line.strip('\|').split('\|')
	cells = [cell.strip() for cell in cells]

	if cells:
	table_rows.append(cells)

	i += 1

	return table_rows, i - 1


	def add_table_to_doc(doc, table_data):
	"""Add a table to the document from parsed markdown table data."""
	if not table_data or not table_data[0]:
	return

	num_cols = len(table_data[0])
	num_rows = len(table_data)

	# Create table
	table = doc.add_table(rows=num_rows, cols=num_cols)
	table.style = 'Table Grid'

	# Fill in cells
	for row_idx, row_data in enumerate(table_data):
	row = table.rows[row_idx]
	for col_idx, cell_text in enumerate(row_data):
	if col_idx < len(row.cells):
	cell = row.cells[col_idx]
	# Clear existing content and add formatted text
	cell.text = ''
	paragraph = cell.paragraphs[0]
	process_inline_formatting(paragraph, cell_text)

	# Make header row bold
	if row_idx == 0:
	for run in paragraph.runs:
	run.bold = True

	# Add some space after the table
	doc.add_paragraph()


	def process_markdown_cell(doc, content, base_path=None):
	"""Process a markdown cell and add to document."""
	lines = content.split('\n')
	i = 0

	while i < len(lines):
	line = lines[i]

	# Headers
	if line.startswith('######'):
	p = doc.add_heading(line[6:].strip(), level=6)
	for run in p.runs:
	run.font.name = 'Poppins'
	elif line.startswith('#####'):
	p = doc.add_heading(line[5:].strip(), level=5)
	for run in p.runs:
	run.font.name = 'Poppins'
	elif line.startswith('####'):
	p = doc.add_heading(line[4:].strip(), level=4)
	for run in p.runs:
	run.font.name = 'Poppins'
	elif line.startswith('###'):
	p = doc.add_heading(line[3:].strip(), level=3)
	for run in p.runs:
	run.font.name = 'Poppins'
	elif line.startswith('##'):
	p = doc.add_heading(line[2:].strip(), level=2)
	for run in p.runs:
	run.font.name = 'Poppins'
	elif line.startswith('#'):
	p = doc.add_heading(line[1:].strip(), level=1)
	for run in p.runs:
	run.font.name = 'Poppins'

	# Code blocks
	elif line.startswith('```'):
	lang = line[3:].strip()
	code_lines = [f'```{lang}']
	i += 1
	while i < len(lines) and not lines[i].startswith('```'):
	code_lines.append(lines[i])
	i += 1
	code_lines.append('```')

	# Add code block with tight spacing
	add_code_block(doc, code_lines)

	# Blockquotes
	elif line.startswith('>'):
	quote_text = line[1:].strip()
	p = doc.add_paragraph()
	p.paragraph_format.left_indent = Inches(0.5)
	process_inline_formatting(p, quote_text)

	# Unordered lists
	elif line.strip().startswith('- ') or line.strip().startswith('* '):
	list_text = line.strip()[2:]
	p = doc.add_paragraph(style='List Bullet')
	process_inline_formatting(p, list_text)

	# Ordered lists
	elif re.match(r'^\d+\.\s', line.strip()):
	list_text = re.sub(r'^\d+\.\s', '', line.strip())
	p = doc.add_paragraph(style='List Number')
	process_inline_formatting(p, list_text)

	# Images (standalone)
	elif re.match(r'^!\[([^\]]*)\]\(([^)]+)\)$', line.strip()):
	match = re.match(r'^!\[([^\]]*)\]\(([^)]+)\)$', line.strip())
	alt_text = match.group(1)
	image_path = match.group(2)
	add_image_with_alt(doc, image_path, alt_text, base_path)

	# Horizontal rule
	elif line.strip() in ['---', '***', '___']:
	p = doc.add_paragraph()
	p.add_run('─' * 50)

	# Empty line
	elif not line.strip():
	pass # Skip empty lines

	# Tables (lines starting with \|)
	elif line.strip().startswith('\|'):
	table_data, end_index = parse_markdown_table(lines, i)
	if table_data:
	add_table_to_doc(doc, table_data)
	i = end_index

	# Regular paragraph
	else:
	p = doc.add_paragraph()
	process_inline_formatting(p, line)

	i += 1


	def process_code_cell(doc, source, outputs=None):
	"""Process a code cell - show code with triple backticks."""
	code_lines = ['```python'] + source.split('\n') + ['```']
	add_code_block(doc, code_lines)


	def convert_notebook_to_docx(notebook_path, output_path=None):
	"""Convert a Jupyter notebook to a Word document."""
	notebook_path = Path(notebook_path)

	if output_path is None:
	output_path = notebook_path.with_suffix('.docx')
	else:
	output_path = Path(output_path)

	# Read notebook
	with open(notebook_path, 'r', encoding='utf-8') as f:
	nb = nbformat.read(f, as_version=4)

	# Create document
	doc = Document()
	create_styles(doc)

	# Set default font for Normal style
	style = doc.styles['Normal']
	style.font.name = 'Poppins'
	style.font.size = Pt(11)

	# Base path for resolving relative image paths
	base_path = notebook_path.parent

	# Process cells
	for cell in nb.cells:
	if cell.cell_type == 'markdown':
	process_markdown_cell(doc, cell.source, base_path)
	elif cell.cell_type == 'code':
	process_code_cell(doc, cell.source, cell.get('outputs', []))

	# Save document
	doc.save(output_path)
	print(f'Converted: {notebook_path} -> {output_path}')
	return output_path


	def main():
	if len(sys.argv) < 2:
	print('Usage: python notebook_to_docx.py <notebook_path> [output_path]')
	sys.exit(1)

	notebook_path = sys.argv[1]
	output_path = sys.argv[2] if len(sys.argv) > 2 else None

	convert_notebook_to_docx(notebook_path, output_path)


	if __name__ == '__main__':
	main()
No results found