|
import json |
|
import os |
|
import sys |
|
import deepl |
|
import readline |
|
|
|
my_key = open("./my_key.txt", "r").read().strip() |
|
translator = deepl.Translator(my_key) |
|
|
|
|
|
def needs_translate(cell): |
|
if is_toc_cell(cell): |
|
return False |
|
if is_code_cell(cell): |
|
return False |
|
if is_html_cell(cell): |
|
return False |
|
|
|
return True |
|
|
|
|
|
def get_cell_text(cell): |
|
return "\n".join(cell["source"]) |
|
|
|
|
|
def is_latex_cell(cell): |
|
cell_text = get_cell_text(cell) |
|
return cell_text.startswith("$") |
|
|
|
|
|
def is_html_cell(cell): |
|
cell_text = get_cell_text(cell) |
|
return cell_text.startswith("<") |
|
|
|
|
|
def is_toc_cell(cell): |
|
return cell["metadata"].get("toc") |
|
|
|
|
|
def is_code_cell(cell): |
|
return cell["cell_type"] == "code" |
|
|
|
|
|
def deepl_translate(text, from_lang, to_lang): |
|
result = translator.translate_text(text, source_lang=from_lang, target_lang=to_lang) |
|
translated_text = result.text |
|
|
|
return translated_text |
|
|
|
|
|
def generate_translated_notebook(source_lang, target_lang, filename): |
|
with open(filename, "rb") as f: |
|
notebook = json.load(f) |
|
|
|
# clone old notebook (maintain metadata, indent, etc) and reset cells |
|
new_notebook = notebook.copy() |
|
new_notebook["cells"] = [] |
|
n_translations = 0 |
|
|
|
for i, original_cell in enumerate(notebook["cells"]): |
|
if i % 10 == 0: |
|
print(f"Evaluating cell #{i}") |
|
|
|
new_notebook["cells"].append(original_cell) |
|
|
|
if not needs_translate(original_cell): |
|
continue |
|
|
|
# else: needs translation |
|
n_translations += 1 |
|
new_cell = original_cell.copy() |
|
original_text = get_cell_text(original_cell) |
|
translated_text = deepl_translate(original_text, source_lang, target_lang) |
|
|
|
new_cell['source'] = translated_text |
|
|
|
new_notebook["cells"].append(new_cell) |
|
|
|
print(f"Translated {n_translations}/{i} cells") |
|
|
|
filepath_translated = f"{filename.split('.ipynb')[0]}_{target_lang}.ipynb" |
|
with open(filepath_translated, "w") as f: |
|
print(f"You can find translated notebook here: {filepath_translated}") |
|
json.dump(new_notebook, f, indent=2) |
|
|
|
|
|
if __name__ == "__main__": |
|
readline.set_completer_delims(' \t\n=') |
|
readline.parse_and_bind("tab: complete") |
|
|
|
filename = input("File path: ") |
|
|
|
print(f"You entered: {filename}") |
|
if not os.path.exists(filename): |
|
print("That file path does not exist") |
|
sys.exit() |
|
|
|
source_lang = input("Source language: ") |
|
target_lang = input("Target language: ") |
|
|
|
|
|
generate_translated_notebook(source_lang, target_lang, filename) |