AnirudhDagar · November 8, 2021 19:21
diff --git a/d2l-reference-compiler.py b/d2l-reference-compiler.py
 ##########################################
 # Requires the following modules:
 # d2lbook, bibtexparser, pybtex
 ##########################################


 import re
 from d2lbook import slides, notebook, markdown
 from collections import defaultdict
 import nbformat
 import os
 import glob
 import bibtexparser
 import logging
 import subprocess


 class PrepareNotebooks():
    def __init__(self):
        self.unnumbered_chapters = ("chapter_preface", "chapter_installation",
                                    "chapter_notation", "chapter_references")

    def find_files(self):
        """
        glob and collect all ".ipynb" files.
        
        Returns
        -------
        chapter_fnames : list
            list of all files under a particular chapter.
            Note: Doesn't include ".ipynb" files in the root dir.

        chapter_files_dict : dict
            default dictionary containing keys as chapter names and
            values as lists of section names.
            Example: {
            "chapter_linear_networks": ["image-classification-dataset.ipynb", "index.ipynb", ...],
            "chapter_preliminaries": [...],
            ...
            }

        """
        chapter_fnames = []
        # Use default dict to create a list in case the key is missing
        # when adding the file_name for the first time in a chapter
        chapter_files_dict = defaultdict(list)

        for full_name in glob.glob('**/*.ipynb', recursive=True):
            if os.path.isfile(full_name):
                    if "chapter" in full_name:
                    	# This filters out getting_started.ipynb, index.ipynb
                        # in the root etc. Only append notebooks which are part
                        # of some chapter
                    	chapter_fnames.append(full_name)
                    	chapter_name, file_name = full_name.split("/")
                    	chapter_files_dict[chapter_name].append(file_name)

        return chapter_fnames, chapter_files_dict

    def get_per_chapter_order(self):
        """
        glob and collect all '.ipynb' files.
        
        Returns
        -------
        ordered_ch_dict : dict
            default dictionary containing keys as chapter names in ORDER
            OF THEIR SEQUENCE in the book and the values as lists of
            section names also in the ORDER OF THEIR SEQUENCE in the book.
            Example: {
            "chapter_preface": ['index.ipynb']
            "chapter_installation": ['index.ipynb']
            "chapter_notation": ['index.ipynb']
            "chapter_introduction": ['index.ipynb']
            "chapter_preliminaries": ['index.ipynb', 'ndarray.ipynb', 'pandas.ipynb', 'linear-algebra.ipynb', ...]
            "chapter_linear-networks ['index.ipynb', 'linear-regression.ipynb', 'linear-regression-scratch.ipynb', ...]
            ...
            }

        chapter_files_dict : dict
            default dictionary containing keys as chapter names and
            values as lists of section names.
            Example: {
            "chapter_linear_networks": ["image-classification-dataset.ipynb", "index.ipynb", ...],
            "chapter_preliminaries": [...],
            ...
            }

        """
        ordered_ch_dict = defaultdict(list)
        
        # Get all notebooks
        chapter_fnames, chapter_files_dict = self.find_files()

        # Parse the root 'index.ipynb' notebook for chapter ordering.
        root_nb = notebook.read("index.ipynb")
        for line in root_nb.cells[0].source.split('\n'):
            if "chapter" in line:
                ch_name = line.split("]")[0].split("[")[1].split("/")[0]

                if len(chapter_files_dict[ch_name]) < 2:
                    # Only a single notebook present and
                    # no other sections in this chapter.
                    # No need to check the order; continue the loop.
                    # Eg: 'chapter_preface'
                    #     'chapter_introduction'
                    #     'chapter_installation'
                    #     'chapter_references' etc.
                    ordered_ch_dict[ch_name] = chapter_files_dict[ch_name]
                    continue
                
                ch_index_name = ch_name + "/" + "index.ipynb"
                ch_index_nb = notebook.read(ch_index_name)

                # Add index.ipynb if exists to ordered_ch_dict
                if os.path.isfile(ch_index_name):
                    ordered_ch_dict[ch_name] = ["index.ipynb"]

                # Use ":begin_tab:toc" and ":end_tab:" as markers to
                # start and end the search for section ordering.
                toc = False
                # Parse each chapter 'index.ipynb' notebook for section ordering.
                for line in ch_index_nb.cells[0].source.split('\n'):
                    if ":begin_tab:toc" in line:
                        toc = True
                        continue
                    if ":end_tab:" in line:
                        break
                    if toc:
                        file_name = line.split("(")[1].replace(")", "")
                        ordered_ch_dict[ch_name].append(file_name)

        return ordered_ch_dict, chapter_files_dict

    def validate_and_generate_numbering(self):
        """
        Validate that all notebooks are covered.
        Also add chapter numbers and subsection numbers to orderded_ch_dict.
        
        Returns
        -------
        ordered_ch_dict_with_num : dict
            dictionary containing keys as chapter names in ORDER 
        """
        ordered_ch_dict, chapter_files_dict = self.get_per_chapter_order()
        ordered_ch_dict_with_num = {}
        idx = 1
        for key, value in ordered_ch_dict.items():
            if key in self.unnumbered_chapters:
                idx -= idx  # Assign zero numbering to unnumbered chapters
            print(idx, key, value)

            ordered_ch_dict_with_num[key] = (str(idx), [(str(idx) + "." + str(sub_idx), nb_name)
                                             for sub_idx, nb_name in enumerate(value)])

            if len(ordered_ch_dict[key]) > 0: assert len(chapter_files_dict[key]) == len(value)
            idx += 1

        return ordered_ch_dict_with_num


 print("*"*40)
 print("Preparing Notebooks...\n")
 get_notebooks = PrepareNotebooks()

 ordered_ch_dict_with_num = get_notebooks.validate_and_generate_numbering()
 print("\n","*"*40,"\n")
 print("Done ")

 ######################################################################################################
 ################################# PARSE AND REPLACE CONTENT ##########################################
 ######################################################################################################


 print("\n","*"*40,"\n")
 print("Starting with Reading Notebooks...")
 print("\n","*"*40,"\n")

 # Our special mark in markdown, e.g. :label:`chapter_intro`
 md_mark_pattern = re.compile(':([-\/\\._\w]+):(`[\ \*-\/\\\._\w]+`)?')
 skip_num_chapters = ["chapter_preface", "chapter_notation", "chapter_installation"]


 class TableUpdater():
    def __init__(self, subsec_num, ch_name, subsec_name):
        self.ch_name = ch_name
        self.subsec_name = subsec_name
        self.subsec_num = subsec_num
        self.path = "../" + ch_name + "/" + subsec_name

    def eq(self, eq_table, eq_key, per_subsec_eq_count):
        eq_id_num = str(self.subsec_num) + "." + str(per_subsec_eq_count)
        eq_id = "eq" + eq_id_num
        replacement_eq_name = "[(" + eq_id_num + f")]({self.path}#{eq_id})"
        if ".0." in replacement_eq_name:
            replacement_eq_name = replacement_eq_name.replace(".0.", ".")
        eq_table[eq_key] = replacement_eq_name
        
        return eq_table, eq_id

    def fig(self, fig_table, fig_key, per_subsec_fig_count):
        # Special handle some sections
        fig_id_num = str(self.subsec_num) + "." + str(per_subsec_fig_count)
        fig_id = "fig" + fig_id_num

        if self.ch_name == "chapter_preface":
            fig_table[fig_key] = "[Fig. 1" + f"]({self.path}#{fig_id})"
            return fig_table, fig_id

        replacement_fig_name = "[Fig. " + fig_id_num + f"]({self.path}#{fig_id})"
        if ".0." in replacement_fig_name:
            replacement_fig_name = replacement_fig_name.replace(".0.", ".")
            fig_id = fig_id.replace(".0.", ".")
        fig_table[fig_key] = replacement_fig_name

        return fig_table, fig_id

    def table(self, table_table, table_key, per_subsec_table_count):
        # Special handle some sections
        table_id_num = str(self.subsec_num) + "." + str(per_subsec_table_count)
        table_id = "table" + table_id_num
        if ".0." in table_id:
            table_id = table_id.replace(".0.", ".")
            table_id_num = table_id_num.replace(".0.", ".")
        if self.ch_name == "chapter_preface":
            table_table[table_key] = "[Table. 1" + f"]({self.path}#{table_id})"
            return table_table, table_id
        replacement_table_name = "[Table. " + table_id_num + f"]({self.path}#{table_id})"
        table_table[table_key] = replacement_table_name
        return table_table, table_id

    def header(self, head_table, sec_key, ref_name):
        if "index.ipynb"==self.subsec_name:
            sec_id = self.subsec_num.split(".")[0]
            replacement_name = "[Chapter " + sec_id + f"]({self.path})"
        else:
            sec_id = self.subsec_num
            replacement_name = "[Section " + sec_id + f"]({self.path})"
        ref_name = "[" + ref_name + f"]({self.path})" 
        head_table[sec_key] = (replacement_name, ref_name)
        return head_table, sec_id

    def sec(self, sec_table, sec_key, two_down, three_down, line_sec_name):
        sec_id = self.get_sec_id(two_down, three_down, 0)
        if self.ch_name in skip_num_chapters:
            link_id = '-'.join(line_sec_name.split("# ")[1].split(" "))
        else:
            link_id = sec_id + "-" + '-'.join(line_sec_name.split("# ")[1].split(" "))
        replacement_sec_name = "[Section " + sec_id + f"]({self.path}#{link_id})"
        ref_name = "[" + line_sec_name.split("# ")[1] + f"]({self.path}#{link_id})"
        sec_table[sec_key] = replacement_sec_name, ref_name
        return sec_table, sec_id

    def get_sec_id(self, two_down, three_down, four_down):
        sec_id = ".".join([self.subsec_num, str(two_down), str(three_down), str(four_down)])
        sec_id = sec_id.replace(".0", "")
        return sec_id

    def get_table_caption(self, lines, curr_line_num):
        lines_of_interest = lines[:curr_line_num+1]
        lines_of_interest.reverse()
        for idx, line in enumerate(lines_of_interest):
            if line.startswith(":label:"):
                continue
            if line.startswith(":"):
                return line[1:], idx

    def element_center_formatting(self, id, num, caption):
        return f"<center id=\"{id}\"><i>" + num + " " + caption + "</i></center>"

    def element_eq_formatting(self, per_subsec_count, eq_id=None):
        if eq_id:
            return f"<i id=\"{eq_id}\" style=\"float: right\">" + "(" + eq_id.strip("eq") + ")" + "</i><br/><br/>"
        else:
            return f"<i style=\"float: right\">" + "(" + str(self.subsec_num) + "." + str(per_subsec_count) + ")" + "</i><br/><br/>"


 def generate_tables_and_replace_unlabeled(ordered_ch_dict_with_num):
    eq_match = re.compile("\$\$(.*?)\$\$")
    sec_match = re.compile("\#\s(\D*?)$")
    fig_table, table_table, eq_table, head_table, sec_table = {}, {}, {}, {}, {}
    count_match = 0

    for ch_name, value in ordered_ch_dict_with_num.items():
        for idx in range(len(value[1])):
            per_subsec_fig_count, per_subsec_table_count, per_subsec_eq_count = 0, 0, 0
            two_down, three_down, four_down = 0, 0, 0
            dollar = 0
            subsec_name = value[1][idx][1]
            subsec_num = value[1][idx][0]
            new_cells = []
            sec_path = os.path.join(os.getcwd(), ch_name, subsec_name)
            nb = notebook.read(sec_path)
            updater = TableUpdater(subsec_num, ch_name, subsec_name)
            for cell in nb.cells:
                if cell.cell_type=='markdown':
                    md_cell = cell.source
                    lines = md_cell.split('\n')
                    for j, line in enumerate(lines):
                        eq_unlabeled = eq_match.search(line)
                        sec_unlabeled = sec_match.search(line)
                        if eq_unlabeled is not None and ("eqlabel" not in lines[j+1]):
                            # Handle equations which are not labeled but
                            # during the numbering they are used
                            per_subsec_eq_count += 1
                            eq_num = updater.element_eq_formatting(per_subsec_eq_count)
                            lines[j] = lines[j] + eq_num
                        elif lines[j]=="$$":
                            dollar += 1
                            if dollar==2:
                                # Handle equations which have $$ syntax
                                # in lines above and below
                                per_subsec_eq_count += 1
                                dollar = 0
                                eq_num = updater.element_eq_formatting(per_subsec_eq_count)
                                lines[j] = "$$\n" + eq_num
                        if len(lines)<2:
                            conditional = sec_unlabeled is not None
                        elif len(lines)==2:
                            conditional = sec_unlabeled is not None and (":label:" not in lines[1])
                        else:
                            try:
                                conditional = sec_unlabeled is not None and (":label:" not in lines[j+1]) and (":label:" not in lines[j+2])
                            except Exception as e:
                                # Some notebooks dont't have pytorch sections but they still have headings
                                # These need to be handled explicitly. They have headers on the last line
                                # See example chapter computational performance
                                conditional = sec_unlabeled is not None and (":label:" not in lines[-1])
                        if conditional and ch_name not in skip_num_chapters:
                            num_pounds = line.count("#")
                            if num_pounds == 2:
                                two_down += 1
                                three_down = 0
                                four_down = 0
                            if num_pounds == 3:
                                three_down += 1
                                four_down = 0
                            if num_pounds == 4:
                                four_down += 1
                            sec_id = updater.get_sec_id(two_down, three_down, four_down)
                            lines[j] = lines[j].replace("# ", f"# {sec_id} ")
                        m = md_mark_pattern.search(line)
                        if (m is not None
                            and m[1] not in ('ref', 'numref', 'eqref')
                            and m.end() == len(line)):
                            count_match += 1
                            # Remove width
                            if m[1] == 'width':
                                lines[j] = ''
                            # Save equation labels
                            if m[1] == 'eqlabel':
                                per_subsec_eq_count += 1
                                eq_table, eq_id = updater.eq(eq_table, m[2], per_subsec_eq_count)
                                eq_label_num_right = updater.element_eq_formatting(per_subsec_eq_count, eq_id=eq_id)
                                lines[j] = lines[j].replace(m[0], eq_label_num_right)
                            # Save figure, sec, subsec, chap, tab labels
                            if m[1] == 'label':
                                if j<2:
                                    conditional = len(lines[j-1]) > 0 and "# " in lines[j-1]
                                else:
                                    conditional =  (len(lines[j-1]) > 0 and "# " in lines[j-1]) or (len(lines[j-2]) > 0 and "# " in lines[j-2])
                                if (conditional):
                                    # Found chapter, section or subsection
                                    num_pounds = lines[j-1].count("#")
                                    if num_pounds == 0: num_pounds = lines[j-2].count("#")
                                    if num_pounds == 1:
                                        # Handling top Headers
                                        if "# " in lines[j-1]: ref_name = lines[j-1].split("# ")[1]
                                        else: ref_name = lines[j-2].split("# ")[1]
                                        head_table, head_id = updater.header(head_table, m[2], ref_name)
                                        lines[j] = ''
                                        if ch_name not in skip_num_chapters:
                                            if "# " in lines[j-1]:
                                                lines[j-1] = lines[j-1].replace("# ", f"# {head_id} ")
                                                ref_name = lines[j-1].split("# ")[1]
                                            else:
                                                lines[j-2] = lines[j-2].replace("# ", f"# {head_id} ")
                                                ref_name = lines[j-2].split("# ")[1]
                                    else:
                                        if num_pounds == 2:
                                            two_down += 1
                                            three_down = 0
                                        if num_pounds == 3:
                                            three_down += 1
                                        if "# " in lines[j-1]:
                                            sec_table, sec_id = updater.sec(sec_table, m[2], two_down, three_down, lines[j-1])
                                            lines[j] = ''
                                            if ch_name not in skip_num_chapters:
                                                lines[j-1] = lines[j-1].replace("# ", f"# {sec_id} ")
                                        else:
                                            sec_table, sec_id = updater.sec(sec_table, m[2], two_down, three_down, lines[j-2])
                                            lines[j] = ''
                                            if ch_name not in skip_num_chapters:
                                                lines[j-2] = lines[j-2].replace("# ", f"# {sec_id} ")
                                # Can't use the following logic to find figures
                                # since `lstm0`, `lstm1`... are also figures.
                                # if ('fig' in m[2]) or ('img' in m[2]):
                                # The if logic below checks for markdown img syntax
                                # in the two lines above
                                elif ("![" in lines[j-1]) or ("![" in lines[j-2]):
                                    if "![" in lines[j-1]:
                                        fig_caption_raw = lines[j-1]
                                    else:
                                        fig_caption_raw = lines[j-2]
                                    fig_caption = fig_caption_raw.split("]")[0].strip("![")
                                    per_subsec_fig_count += 1
                                    # Add figure to fig_table dictionary
                                    fig_table, fig_id = updater.fig(fig_table, m[2], per_subsec_fig_count)
                                    # Replace labels of the figure
                                    fig_label_num = fig_table[m[2]].split("]")[0].replace("[", "")
                                    fig_label_num_centered = updater.element_center_formatting(fig_id, fig_label_num, fig_caption)
                                    lines[j] = lines[j].replace(m[0], fig_label_num_centered)
                                elif ("|"==lines[j-1][0]):
                                    table_caption, caption_line_num = updater.get_table_caption(lines, j)
                                    per_subsec_table_count += 1
                                    # Add table to table_table dictionary
                                    table_table, table_id = updater.table(table_table, m[2], per_subsec_table_count)
                                    # Replace labels of the table
                                    table_label_num = table_table[m[2]].split("]")[0].replace("[", "")
                                    table_label_num_centered = updater.element_center_formatting(table_id, table_label_num, table_caption)
                                    lines[j-caption_line_num] = "\n" + table_label_num_centered
                                    lines[j] = "\n\n"
                                else:
                                    if ch_name not in skip_num_chapters:
                                        assert "fig" not in m[2]
                                        assert "img" not in m[2]
                                        assert "sec" not in m[2]
                                        assert "chap" not in m[2]
                                    print("0:", m[2])
                    md_cell = '\n'.join(lines)
                    new_cells.append(nbformat.v4.new_markdown_cell(md_cell))
                else:
                    new_cells.append(cell)
            new_nb = notebook.create_new_notebook(nb, new_cells)
            with open(sec_path, 'w') as f:
                f.write(nbformat.writes(new_nb))
    return fig_table, eq_table, sec_table, head_table, table_table


 def shorten_replacement(replace_old, subsec_name, ch_name):
    subsec_name_link=replace_old.split("(..")[1].split("/")[2].split("#")[0]
    ch_name_link=replace_old.split("(..")[1].split("/")[1]
    if subsec_name==subsec_name_link and ch_name==ch_name_link:
        replace_name = replace_old.split("(..")[0]
        replace_link = "(#" + replace_old.split("(..")[1].split("/")[2].split("#")[1]
        replacement = replace_name + replace_link
    else:
        replacement = replace_old
    return replacement


 def get_citations():
    """
    Get Citations!
    """
    # Generate temporary file out.txt from d2l.bib
    # containing formatted references output.
    cmd = "pybtex-format --label-style apa --abbreviate-names d2l.bib out.txt"
    process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
    stdout, stderr = process.communicate()
    if process.returncode != 0:
        logging.error('%s', stderr.decode())
        exit(-1)

    with open("out.txt", "r") as ref_file:
        new_lines = ref_file.readlines()

    with open('d2l.bib', 'r') as bibtex_file:
        d2l_bib_db = bibtexparser.load(bibtex_file)

    # Create cite_table
    cite_table = {}
    for i, (line, entries) in enumerate(zip(new_lines, d2l_bib_db.entries)):
        key = entries["ID"]
        cite_name = line.split("]")[0] + "]"
        link = f"(../chapter_references/zreferences.ipynb#{key})"
        replacement_name = cite_name + link
        cite_table[key] = replacement_name
        new_lines[i] = f"<p id={key}>{line}</p>"

    print("Citations Table Created!")
    
    os.remove("out.txt")
    print("Citations temporary file removed!")

    sec_references_path = os.path.join(os.getcwd(), "chapter_references", "zreferences.ipynb")
    new_cells = []
    nb = notebook.read(sec_references_path)
    md_cell = '\n'.join(new_lines)
    new_cells.append(nbformat.v4.new_markdown_cell(md_cell))
    new_nb = notebook.create_new_notebook(nb, new_cells)
    with open(sec_references_path, 'w') as f:
        f.write(nbformat.writes(new_nb))

    return cite_table


 def replace_references(fig_table, eq_table, sec_table, head_table, table_table, cite_table):
    md_mark_pattern_ref = re.compile(':ref:(`[\ \*-\/\\\._\w]+`)?')
    md_mark_pattern_numref = re.compile(':numref:(`[\ \*-\/\\\._\w]+`)?')
    md_mark_pattern_eqref = re.compile(':eqref:(`[\ \*-\/\\\._\w]+`)?')
    md_mark_pattern_cite = re.compile(':cite:(`[\ \*-\/\\\._\w]+`)?')

    for ch_name, value in ordered_ch_dict_with_num.items():
        for idx in range(len(value[1])):
            subsec_name = value[1][idx][1]
            subsec_num = value[1][idx][0]
            sec_path = os.path.join(os.getcwd(), ch_name, subsec_name)
            new_cells = []
            nb = notebook.read(sec_path)
            for cell in nb.cells:
                if cell.cell_type=='markdown':
                    md_cell = cell.source
                    lines = md_cell.split('\n')
                    for j, line in enumerate(lines):
                        m_all_numref = md_mark_pattern_numref.findall(line)
                        m_all_ref = md_mark_pattern_ref.findall(line)
                        m_all_eqref = md_mark_pattern_eqref.findall(line)
                        m_all_cite = md_mark_pattern_cite.findall(line)
                        for m in m_all_numref:
                            if m in fig_table.keys():                                
                                replacement = shorten_replacement(fig_table[m], subsec_name, ch_name)
                                lines[j] = lines[j].replace(":numref:" + m, replacement)
                            if m in sec_table.keys():
                                replacement = shorten_replacement(sec_table[m][0], subsec_name, ch_name)
                                lines[j] = lines[j].replace(":numref:" + m, replacement)
                            if m in head_table.keys():
                                replacement = shorten_replacement(head_table[m][0], subsec_name, ch_name)
                                lines[j] = lines[j].replace(":numref:" + m, replacement)
                            if m in table_table.keys():
                                replacement = shorten_replacement(table_table[m], subsec_name, ch_name)
                                lines[j] = lines[j].replace(":numref:" + m, replacement)
                        for m in m_all_eqref:
                            if m in eq_table.keys():
                                replacement = shorten_replacement(eq_table[m], subsec_name, ch_name)
                                lines[j] = lines[j].replace(":eqref:" + m, replacement)
                        for m in m_all_ref:
                            if m in sec_table.keys():
                                replacement = shorten_replacement(sec_table[m][1], subsec_name, ch_name)
                                lines[j] = lines[j].replace(":ref:" + m, replacement)
                            if m in head_table.keys():
                                replacement = shorten_replacement(head_table[m][1], subsec_name, ch_name)
                                lines[j] = lines[j].replace(":ref:" + m, replacement)
                        for m in m_all_cite:
                            m_stripped = m.strip("`")
                            if "," in m:
                                replacement_list = []
                                all_cited = m_stripped.split(",")
                                replacement_list = [cite_table[cited] for cited in all_cited]
                                replacement = ','.join(replacement_list)
                            else:
                                replacement = cite_table[m_stripped]
                            lines[j] = lines[j].replace(":cite:" + m, replacement)
                    md_cell = '\n'.join(lines)
                    new_cells.append(nbformat.v4.new_markdown_cell(md_cell))
                else:
                    new_cells.append(cell)
            new_nb = notebook.create_new_notebook(nb, new_cells)
            with open(sec_path, 'w') as f:
                f.write(nbformat.writes(new_nb))


 fig_table, eq_table, sec_table, head_table, table_table = generate_tables_and_replace_unlabeled(ordered_ch_dict_with_num)
 cite_table = get_citations()

 replace_references(fig_table, eq_table, sec_table, head_table, table_table, cite_table)

 print("Done!")
	##########################################
	# Requires the following modules:
	# d2lbook, bibtexparser, pybtex
	##########################################


	import re
	from d2lbook import slides, notebook, markdown
	from collections import defaultdict
	import nbformat
	import os
	import glob
	import bibtexparser
	import logging
	import subprocess


	class PrepareNotebooks():
	def __init__(self):
	self.unnumbered_chapters = ("chapter_preface", "chapter_installation",
	"chapter_notation", "chapter_references")

	def find_files(self):
	"""
	glob and collect all ".ipynb" files.

	Returns
	-------
	chapter_fnames : list
	list of all files under a particular chapter.
	Note: Doesn't include ".ipynb" files in the root dir.

	chapter_files_dict : dict
	default dictionary containing keys as chapter names and
	values as lists of section names.
	Example: {
	"chapter_linear_networks": ["image-classification-dataset.ipynb", "index.ipynb", ...],
	"chapter_preliminaries": [...],
	...
	}

	"""
	chapter_fnames = []
	# Use default dict to create a list in case the key is missing
	# when adding the file_name for the first time in a chapter
	chapter_files_dict = defaultdict(list)

	for full_name in glob.glob('*/.ipynb', recursive=True):
	if os.path.isfile(full_name):
	if "chapter" in full_name:
	# This filters out getting_started.ipynb, index.ipynb
	# in the root etc. Only append notebooks which are part
	# of some chapter
	chapter_fnames.append(full_name)
	chapter_name, file_name = full_name.split("/")
	chapter_files_dict[chapter_name].append(file_name)

	return chapter_fnames, chapter_files_dict

	def get_per_chapter_order(self):
	"""
	glob and collect all '.ipynb' files.

	Returns
	-------
	ordered_ch_dict : dict
	default dictionary containing keys as chapter names in ORDER
	OF THEIR SEQUENCE in the book and the values as lists of
	section names also in the ORDER OF THEIR SEQUENCE in the book.
	Example: {
	"chapter_preface": ['index.ipynb']
	"chapter_installation": ['index.ipynb']
	"chapter_notation": ['index.ipynb']
	"chapter_introduction": ['index.ipynb']
	"chapter_preliminaries": ['index.ipynb', 'ndarray.ipynb', 'pandas.ipynb', 'linear-algebra.ipynb', ...]
	"chapter_linear-networks ['index.ipynb', 'linear-regression.ipynb', 'linear-regression-scratch.ipynb', ...]
	...
	}

	chapter_files_dict : dict
	default dictionary containing keys as chapter names and
	values as lists of section names.
	Example: {
	"chapter_linear_networks": ["image-classification-dataset.ipynb", "index.ipynb", ...],
	"chapter_preliminaries": [...],
	...
	}

	"""
	ordered_ch_dict = defaultdict(list)

	# Get all notebooks
	chapter_fnames, chapter_files_dict = self.find_files()

	# Parse the root 'index.ipynb' notebook for chapter ordering.
	root_nb = notebook.read("index.ipynb")
	for line in root_nb.cells[0].source.split('\n'):
	if "chapter" in line:
	ch_name = line.split("]")[0].split("[")[1].split("/")[0]

	if len(chapter_files_dict[ch_name]) < 2:
	# Only a single notebook present and
	# no other sections in this chapter.
	# No need to check the order; continue the loop.
	# Eg: 'chapter_preface'
	# 'chapter_introduction'
	# 'chapter_installation'
	# 'chapter_references' etc.
	ordered_ch_dict[ch_name] = chapter_files_dict[ch_name]
	continue

	ch_index_name = ch_name + "/" + "index.ipynb"
	ch_index_nb = notebook.read(ch_index_name)

	# Add index.ipynb if exists to ordered_ch_dict
	if os.path.isfile(ch_index_name):
	ordered_ch_dict[ch_name] = ["index.ipynb"]

	# Use ":begin_tab:toc" and ":end_tab:" as markers to
	# start and end the search for section ordering.
	toc = False
	# Parse each chapter 'index.ipynb' notebook for section ordering.
	for line in ch_index_nb.cells[0].source.split('\n'):
	if ":begin_tab:toc" in line:
	toc = True
	continue
	if ":end_tab:" in line:
	break
	if toc:
	file_name = line.split("(")[1].replace(")", "")
	ordered_ch_dict[ch_name].append(file_name)

	return ordered_ch_dict, chapter_files_dict

	def validate_and_generate_numbering(self):
	"""
	Validate that all notebooks are covered.
	Also add chapter numbers and subsection numbers to orderded_ch_dict.

	Returns
	-------
	ordered_ch_dict_with_num : dict
	dictionary containing keys as chapter names in ORDER
	"""
	ordered_ch_dict, chapter_files_dict = self.get_per_chapter_order()
	ordered_ch_dict_with_num = {}
	idx = 1
	for key, value in ordered_ch_dict.items():
	if key in self.unnumbered_chapters:
	idx -= idx # Assign zero numbering to unnumbered chapters
	print(idx, key, value)

	ordered_ch_dict_with_num[key] = (str(idx), [(str(idx) + "." + str(sub_idx), nb_name)
	for sub_idx, nb_name in enumerate(value)])

	if len(ordered_ch_dict[key]) > 0: assert len(chapter_files_dict[key]) == len(value)
	idx += 1

	return ordered_ch_dict_with_num


	print(""40)
	print("Preparing Notebooks...\n")
	get_notebooks = PrepareNotebooks()

	ordered_ch_dict_with_num = get_notebooks.validate_and_generate_numbering()
	print("\n",""40,"\n")
	print("Done ")

	######################################################################################################
	################################# PARSE AND REPLACE CONTENT ##########################################
	######################################################################################################


	print("\n",""40,"\n")
	print("Starting with Reading Notebooks...")
	print("\n",""40,"\n")

	# Our special mark in markdown, e.g. :label:`chapter_intro`
	md_mark_pattern = re.compile(':([-\/\\._\w]+):(`[\ \*-\/\\\._\w]+`)?')
	skip_num_chapters = ["chapter_preface", "chapter_notation", "chapter_installation"]


	class TableUpdater():
	def __init__(self, subsec_num, ch_name, subsec_name):
	self.ch_name = ch_name
	self.subsec_name = subsec_name
	self.subsec_num = subsec_num
	self.path = "../" + ch_name + "/" + subsec_name

	def eq(self, eq_table, eq_key, per_subsec_eq_count):
	eq_id_num = str(self.subsec_num) + "." + str(per_subsec_eq_count)
	eq_id = "eq" + eq_id_num
	replacement_eq_name = "[(" + eq_id_num + f")]({self.path}#{eq_id})"
	if ".0." in replacement_eq_name:
	replacement_eq_name = replacement_eq_name.replace(".0.", ".")
	eq_table[eq_key] = replacement_eq_name

	return eq_table, eq_id

	def fig(self, fig_table, fig_key, per_subsec_fig_count):
	# Special handle some sections
	fig_id_num = str(self.subsec_num) + "." + str(per_subsec_fig_count)
	fig_id = "fig" + fig_id_num

	if self.ch_name == "chapter_preface":
	fig_table[fig_key] = "[Fig. 1" + f"]({self.path}#{fig_id})"
	return fig_table, fig_id

	replacement_fig_name = "[Fig. " + fig_id_num + f"]({self.path}#{fig_id})"
	if ".0." in replacement_fig_name:
	replacement_fig_name = replacement_fig_name.replace(".0.", ".")
	fig_id = fig_id.replace(".0.", ".")
	fig_table[fig_key] = replacement_fig_name

	return fig_table, fig_id

	def table(self, table_table, table_key, per_subsec_table_count):
	# Special handle some sections
	table_id_num = str(self.subsec_num) + "." + str(per_subsec_table_count)
	table_id = "table" + table_id_num
	if ".0." in table_id:
	table_id = table_id.replace(".0.", ".")
	table_id_num = table_id_num.replace(".0.", ".")
	if self.ch_name == "chapter_preface":
	table_table[table_key] = "[Table. 1" + f"]({self.path}#{table_id})"
	return table_table, table_id
	replacement_table_name = "[Table. " + table_id_num + f"]({self.path}#{table_id})"
	table_table[table_key] = replacement_table_name
	return table_table, table_id

	def header(self, head_table, sec_key, ref_name):
	if "index.ipynb"==self.subsec_name:
	sec_id = self.subsec_num.split(".")[0]
	replacement_name = "[Chapter " + sec_id + f"]({self.path})"
	else:
	sec_id = self.subsec_num
	replacement_name = "[Section " + sec_id + f"]({self.path})"
	ref_name = "[" + ref_name + f"]({self.path})"
	head_table[sec_key] = (replacement_name, ref_name)
	return head_table, sec_id

	def sec(self, sec_table, sec_key, two_down, three_down, line_sec_name):
	sec_id = self.get_sec_id(two_down, three_down, 0)
	if self.ch_name in skip_num_chapters:
	link_id = '-'.join(line_sec_name.split("# ")[1].split(" "))
	else:
	link_id = sec_id + "-" + '-'.join(line_sec_name.split("# ")[1].split(" "))
	replacement_sec_name = "[Section " + sec_id + f"]({self.path}#{link_id})"
	ref_name = "[" + line_sec_name.split("# ")[1] + f"]({self.path}#{link_id})"
	sec_table[sec_key] = replacement_sec_name, ref_name
	return sec_table, sec_id

	def get_sec_id(self, two_down, three_down, four_down):
	sec_id = ".".join([self.subsec_num, str(two_down), str(three_down), str(four_down)])
	sec_id = sec_id.replace(".0", "")
	return sec_id

	def get_table_caption(self, lines, curr_line_num):
	lines_of_interest = lines[:curr_line_num+1]
	lines_of_interest.reverse()
	for idx, line in enumerate(lines_of_interest):
	if line.startswith(":label:"):
	continue
	if line.startswith(":"):
	return line[1:], idx

	def element_center_formatting(self, id, num, caption):
	return f"<center id=\"{id}\"><i>" + num + " " + caption + "</i></center>"

	def element_eq_formatting(self, per_subsec_count, eq_id=None):
	if eq_id:
	return f"<i id=\"{eq_id}\" style=\"float: right\">" + "(" + eq_id.strip("eq") + ")" + "</i><br/><br/>"
	else:
	return f"<i style=\"float: right\">" + "(" + str(self.subsec_num) + "." + str(per_subsec_count) + ")" + "</i><br/><br/>"


	def generate_tables_and_replace_unlabeled(ordered_ch_dict_with_num):
	eq_match = re.compile("\$\$(.*?)\$\$")
	sec_match = re.compile("\#\s(\D*?)$")
	fig_table, table_table, eq_table, head_table, sec_table = {}, {}, {}, {}, {}
	count_match = 0

	for ch_name, value in ordered_ch_dict_with_num.items():
	for idx in range(len(value[1])):
	per_subsec_fig_count, per_subsec_table_count, per_subsec_eq_count = 0, 0, 0
	two_down, three_down, four_down = 0, 0, 0
	dollar = 0
	subsec_name = value[1][idx][1]
	subsec_num = value[1][idx][0]
	new_cells = []
	sec_path = os.path.join(os.getcwd(), ch_name, subsec_name)
	nb = notebook.read(sec_path)
	updater = TableUpdater(subsec_num, ch_name, subsec_name)
	for cell in nb.cells:
	if cell.cell_type=='markdown':
	md_cell = cell.source
	lines = md_cell.split('\n')
	for j, line in enumerate(lines):
	eq_unlabeled = eq_match.search(line)
	sec_unlabeled = sec_match.search(line)
	if eq_unlabeled is not None and ("eqlabel" not in lines[j+1]):
	# Handle equations which are not labeled but
	# during the numbering they are used
	per_subsec_eq_count += 1
	eq_num = updater.element_eq_formatting(per_subsec_eq_count)
	lines[j] = lines[j] + eq_num
	elif lines[j]=="$$":
	dollar += 1
	if dollar==2:
	# Handle equations which have $$ syntax
	# in lines above and below
	per_subsec_eq_count += 1
	dollar = 0
	eq_num = updater.element_eq_formatting(per_subsec_eq_count)
	lines[j] = "$$\n" + eq_num
	if len(lines)<2:
	conditional = sec_unlabeled is not None
	elif len(lines)==2:
	conditional = sec_unlabeled is not None and (":label:" not in lines[1])
	else:
	try:
	conditional = sec_unlabeled is not None and (":label:" not in lines[j+1]) and (":label:" not in lines[j+2])
	except Exception as e:
	# Some notebooks dont't have pytorch sections but they still have headings
	# These need to be handled explicitly. They have headers on the last line
	# See example chapter computational performance
	conditional = sec_unlabeled is not None and (":label:" not in lines[-1])
	if conditional and ch_name not in skip_num_chapters:
	num_pounds = line.count("#")
	if num_pounds == 2:
	two_down += 1
	three_down = 0
	four_down = 0
	if num_pounds == 3:
	three_down += 1
	four_down = 0
	if num_pounds == 4:
	four_down += 1
	sec_id = updater.get_sec_id(two_down, three_down, four_down)
	lines[j] = lines[j].replace("# ", f"# {sec_id} ")
	m = md_mark_pattern.search(line)
	if (m is not None
	and m[1] not in ('ref', 'numref', 'eqref')
	and m.end() == len(line)):
	count_match += 1
	# Remove width
	if m[1] == 'width':
	lines[j] = ''
	# Save equation labels
	if m[1] == 'eqlabel':
	per_subsec_eq_count += 1
	eq_table, eq_id = updater.eq(eq_table, m[2], per_subsec_eq_count)
	eq_label_num_right = updater.element_eq_formatting(per_subsec_eq_count, eq_id=eq_id)
	lines[j] = lines[j].replace(m[0], eq_label_num_right)
	# Save figure, sec, subsec, chap, tab labels
	if m[1] == 'label':
	if j<2:
	conditional = len(lines[j-1]) > 0 and "# " in lines[j-1]
	else:
	conditional = (len(lines[j-1]) > 0 and "# " in lines[j-1]) or (len(lines[j-2]) > 0 and "# " in lines[j-2])
	if (conditional):
	# Found chapter, section or subsection
	num_pounds = lines[j-1].count("#")
	if num_pounds == 0: num_pounds = lines[j-2].count("#")
	if num_pounds == 1:
	# Handling top Headers
	if "# " in lines[j-1]: ref_name = lines[j-1].split("# ")[1]
	else: ref_name = lines[j-2].split("# ")[1]
	head_table, head_id = updater.header(head_table, m[2], ref_name)
	lines[j] = ''
	if ch_name not in skip_num_chapters:
	if "# " in lines[j-1]:
	lines[j-1] = lines[j-1].replace("# ", f"# {head_id} ")
	ref_name = lines[j-1].split("# ")[1]
	else:
	lines[j-2] = lines[j-2].replace("# ", f"# {head_id} ")
	ref_name = lines[j-2].split("# ")[1]
	else:
	if num_pounds == 2:
	two_down += 1
	three_down = 0
	if num_pounds == 3:
	three_down += 1
	if "# " in lines[j-1]:
	sec_table, sec_id = updater.sec(sec_table, m[2], two_down, three_down, lines[j-1])
	lines[j] = ''
	if ch_name not in skip_num_chapters:
	lines[j-1] = lines[j-1].replace("# ", f"# {sec_id} ")
	else:
	sec_table, sec_id = updater.sec(sec_table, m[2], two_down, three_down, lines[j-2])
	lines[j] = ''
	if ch_name not in skip_num_chapters:
	lines[j-2] = lines[j-2].replace("# ", f"# {sec_id} ")
	# Can't use the following logic to find figures
	# since `lstm0`, `lstm1`... are also figures.
	# if ('fig' in m[2]) or ('img' in m[2]):
	# The if logic below checks for markdown img syntax
	# in the two lines above
	elif ("![" in lines[j-1]) or ("![" in lines[j-2]):
	if "![" in lines[j-1]:
	fig_caption_raw = lines[j-1]
	else:
	fig_caption_raw = lines[j-2]
	fig_caption = fig_caption_raw.split("]")[0].strip("![")
	per_subsec_fig_count += 1
	# Add figure to fig_table dictionary
	fig_table, fig_id = updater.fig(fig_table, m[2], per_subsec_fig_count)
	# Replace labels of the figure
	fig_label_num = fig_table[m[2]].split("]")[0].replace("[", "")
	fig_label_num_centered = updater.element_center_formatting(fig_id, fig_label_num, fig_caption)
	lines[j] = lines[j].replace(m[0], fig_label_num_centered)
	elif ("\|"==lines[j-1][0]):
	table_caption, caption_line_num = updater.get_table_caption(lines, j)
	per_subsec_table_count += 1
	# Add table to table_table dictionary
	table_table, table_id = updater.table(table_table, m[2], per_subsec_table_count)
	# Replace labels of the table
	table_label_num = table_table[m[2]].split("]")[0].replace("[", "")
	table_label_num_centered = updater.element_center_formatting(table_id, table_label_num, table_caption)
	lines[j-caption_line_num] = "\n" + table_label_num_centered
	lines[j] = "\n\n"
	else:
	if ch_name not in skip_num_chapters:
	assert "fig" not in m[2]
	assert "img" not in m[2]
	assert "sec" not in m[2]
	assert "chap" not in m[2]
	print("0:", m[2])
	md_cell = '\n'.join(lines)
	new_cells.append(nbformat.v4.new_markdown_cell(md_cell))
	else:
	new_cells.append(cell)
	new_nb = notebook.create_new_notebook(nb, new_cells)
	with open(sec_path, 'w') as f:
	f.write(nbformat.writes(new_nb))
	return fig_table, eq_table, sec_table, head_table, table_table


	def shorten_replacement(replace_old, subsec_name, ch_name):
	subsec_name_link=replace_old.split("(..")[1].split("/")[2].split("#")[0]
	ch_name_link=replace_old.split("(..")[1].split("/")[1]
	if subsec_name==subsec_name_link and ch_name==ch_name_link:
	replace_name = replace_old.split("(..")[0]
	replace_link = "(#" + replace_old.split("(..")[1].split("/")[2].split("#")[1]
	replacement = replace_name + replace_link
	else:
	replacement = replace_old
	return replacement


	def get_citations():
	"""
	Get Citations!
	"""
	# Generate temporary file out.txt from d2l.bib
	# containing formatted references output.
	cmd = "pybtex-format --label-style apa --abbreviate-names d2l.bib out.txt"
	process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE,
	stderr=subprocess.PIPE)
	stdout, stderr = process.communicate()
	if process.returncode != 0:
	logging.error('%s', stderr.decode())
	exit(-1)

	with open("out.txt", "r") as ref_file:
	new_lines = ref_file.readlines()

	with open('d2l.bib', 'r') as bibtex_file:
	d2l_bib_db = bibtexparser.load(bibtex_file)

	# Create cite_table
	cite_table = {}
	for i, (line, entries) in enumerate(zip(new_lines, d2l_bib_db.entries)):
	key = entries["ID"]
	cite_name = line.split("]")[0] + "]"
	link = f"(../chapter_references/zreferences.ipynb#{key})"
	replacement_name = cite_name + link
	cite_table[key] = replacement_name
	new_lines[i] = f"<p id={key}>{line}</p>"

	print("Citations Table Created!")

	os.remove("out.txt")
	print("Citations temporary file removed!")

	sec_references_path = os.path.join(os.getcwd(), "chapter_references", "zreferences.ipynb")
	new_cells = []
	nb = notebook.read(sec_references_path)
	md_cell = '\n'.join(new_lines)
	new_cells.append(nbformat.v4.new_markdown_cell(md_cell))
	new_nb = notebook.create_new_notebook(nb, new_cells)
	with open(sec_references_path, 'w') as f:
	f.write(nbformat.writes(new_nb))

	return cite_table


	def replace_references(fig_table, eq_table, sec_table, head_table, table_table, cite_table):
	md_mark_pattern_ref = re.compile(':ref:(`[\ \*-\/\\\._\w]+`)?')
	md_mark_pattern_numref = re.compile(':numref:(`[\ \*-\/\\\._\w]+`)?')
	md_mark_pattern_eqref = re.compile(':eqref:(`[\ \*-\/\\\._\w]+`)?')
	md_mark_pattern_cite = re.compile(':cite:(`[\ \*-\/\\\._\w]+`)?')

	for ch_name, value in ordered_ch_dict_with_num.items():
	for idx in range(len(value[1])):
	subsec_name = value[1][idx][1]
	subsec_num = value[1][idx][0]
	sec_path = os.path.join(os.getcwd(), ch_name, subsec_name)
	new_cells = []
	nb = notebook.read(sec_path)
	for cell in nb.cells:
	if cell.cell_type=='markdown':
	md_cell = cell.source
	lines = md_cell.split('\n')
	for j, line in enumerate(lines):
	m_all_numref = md_mark_pattern_numref.findall(line)
	m_all_ref = md_mark_pattern_ref.findall(line)
	m_all_eqref = md_mark_pattern_eqref.findall(line)
	m_all_cite = md_mark_pattern_cite.findall(line)
	for m in m_all_numref:
	if m in fig_table.keys():
	replacement = shorten_replacement(fig_table[m], subsec_name, ch_name)
	lines[j] = lines[j].replace(":numref:" + m, replacement)
	if m in sec_table.keys():
	replacement = shorten_replacement(sec_table[m][0], subsec_name, ch_name)
	lines[j] = lines[j].replace(":numref:" + m, replacement)
	if m in head_table.keys():
	replacement = shorten_replacement(head_table[m][0], subsec_name, ch_name)
	lines[j] = lines[j].replace(":numref:" + m, replacement)
	if m in table_table.keys():
	replacement = shorten_replacement(table_table[m], subsec_name, ch_name)
	lines[j] = lines[j].replace(":numref:" + m, replacement)
	for m in m_all_eqref:
	if m in eq_table.keys():
	replacement = shorten_replacement(eq_table[m], subsec_name, ch_name)
	lines[j] = lines[j].replace(":eqref:" + m, replacement)
	for m in m_all_ref:
	if m in sec_table.keys():
	replacement = shorten_replacement(sec_table[m][1], subsec_name, ch_name)
	lines[j] = lines[j].replace(":ref:" + m, replacement)
	if m in head_table.keys():
	replacement = shorten_replacement(head_table[m][1], subsec_name, ch_name)
	lines[j] = lines[j].replace(":ref:" + m, replacement)
	for m in m_all_cite:
	m_stripped = m.strip("`")
	if "," in m:
	replacement_list = []
	all_cited = m_stripped.split(",")
	replacement_list = [cite_table[cited] for cited in all_cited]
	replacement = ','.join(replacement_list)
	else:
	replacement = cite_table[m_stripped]
	lines[j] = lines[j].replace(":cite:" + m, replacement)
	md_cell = '\n'.join(lines)
	new_cells.append(nbformat.v4.new_markdown_cell(md_cell))
	else:
	new_cells.append(cell)
	new_nb = notebook.create_new_notebook(nb, new_cells)
	with open(sec_path, 'w') as f:
	f.write(nbformat.writes(new_nb))


	fig_table, eq_table, sec_table, head_table, table_table = generate_tables_and_replace_unlabeled(ordered_ch_dict_with_num)
	cite_table = get_citations()

	replace_references(fig_table, eq_table, sec_table, head_table, table_table, cite_table)

	print("Done!")