Last active
April 2, 2016 20:07
-
-
Save minrk/3013fe19112055523172 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
nb-offline: run notebooks offline | |
Usage: | |
nb-offline my-notebook.ipynb | |
Will save my-notebook-intermediate.ipynb every time there's an output, and save | |
my-notebook-done.ipynb when it's all done. | |
This is basically `nbconvert --execute`, but it saves an `-intermediate` file on every output. | |
It's a bit of a mess, because I made it in a hurry for @ctitusbrown. | |
In particular, this shouldn't need as much copy/paste from ExecutePreprocessor as it does. | |
""" | |
# Copyright (c) Min RK. | |
# Distributed under the terms of the Modified BSD License. | |
import io | |
import logging | |
import os | |
import sys | |
import time | |
from queue import Empty | |
import nbformat | |
from nbformat.v4 import output_from_msg | |
from nbconvert.exporters import NotebookExporter | |
from nbconvert.preprocessors import ExecutePreprocessor | |
from traitlets import Unicode | |
class OfflinePreprocessor(ExecutePreprocessor): | |
"""Like ExecutePreprocessor, but saves the notebook on every output, | |
so that it can be viewed. | |
""" | |
intermediate_path = Unicode() | |
def preprocess(self, nb, resources): | |
self.notebook = nb | |
print(resources) | |
self._original_get_iopub = None | |
for cell in nb['cells']: | |
if cell['cell_type'] == 'code': | |
cell['outputs'] = [] | |
return super(OfflinePreprocessor, self).preprocess(nb, resources) | |
def run_cell(self, cell): | |
msg_id = self.kc.execute(cell.source) | |
self.log.debug("Executing cell:\n%s", cell.source) | |
timeout = self.timeout | |
start = time.time() | |
if timeout < 0: | |
timeout = 3e9 # 100 years ~= forever | |
deadline = start + timeout | |
outs = cell.outputs = [] | |
# get output as it comes, and save on every | |
while True: | |
timeout = max(deadline-time.time(), 0) | |
try: | |
msg = self.kc.iopub_channel.get_msg(timeout=timeout) | |
except Empty: | |
self.log.error("Timeout waiting for IOPub output. You may want to set --timeout to a larger value") | |
raise TimeoutError("Cell execution timed out, see log for details.") from None | |
if msg['parent_header'].get('msg_id') != msg_id: | |
# not an output from our execution | |
continue | |
msg_type = msg['msg_type'] | |
self.log.debug("output: %s", msg_type) | |
content = msg['content'] | |
# set the prompt number for the input and the output | |
if 'execution_count' in content: | |
cell['execution_count'] = content['execution_count'] | |
self.save() | |
if msg_type == 'status': | |
if content['execution_state'] == 'idle': | |
break | |
else: | |
continue | |
elif msg_type == 'execute_input': | |
continue | |
elif msg_type == 'clear_output': | |
cell.outputs = outs = [] | |
continue | |
elif msg_type.startswith('comm'): | |
continue | |
try: | |
out = output_from_msg(msg) | |
except ValueError: | |
self.log.error("unhandled iopub msg: " + msg_type) | |
else: | |
outs.append(out) | |
self.save() | |
# wait for finish, with timeout | |
while True: | |
timeout = max(deadline-time.time(), 0) | |
try: | |
msg = self.kc.shell_channel.get_msg(timeout=timeout) | |
except Empty: | |
self.log.warn("Timeout waiting for Execute result. You may want to set --timeout to a larger value") | |
if self.interrupt_on_timeout: | |
self.log.error("Interrupting kernel") | |
self.km.interrupt_kernel() | |
break | |
else: | |
raise TimeoutError("Cell execution timed out, see log" | |
" for details.") from None | |
if msg['parent_header'].get('msg_id') == msg_id: | |
break | |
else: | |
# not our reply | |
continue | |
return outs | |
def save(self): | |
self.log.info("Saving %s", self.intermediate_path) | |
nbformat.write(self.notebook, self.intermediate_path) | |
def offline_run(from_path, timeout=-1): | |
"""Run a notebook offline | |
Timeout is per cell. | |
""" | |
base_name, ext = os.path.splitext(from_path) | |
intermediate_path = base_name + '-intermediate' + ext | |
to_path = base_name + '-done' + ext | |
exporter = NotebookExporter() | |
preprocessor = OfflinePreprocessor( | |
intermediate_path=intermediate_path, | |
timeout=timeout, | |
) | |
exporter.register_preprocessor(preprocessor, enabled=True) | |
exporter.preprocessors.append(preprocessor) | |
nb_json, resources = exporter.from_file(from_path) | |
with io.open(to_path, 'w', encoding='utf8') as f: | |
f.write(nb_json) | |
if __name__ == '__main__': | |
import argparse | |
parser = argparse.ArgumentParser(description=__doc__) | |
parser.add_argument('notebook', type=str, help="Notebook file to run offline") | |
parser.add_argument('--debug', action='store_true', help="Debug-level logging") | |
parser.add_argument('--timeout', type=int, default=-1, help="Timeout (in seconds) to wait for each cell's execution to complete.") | |
opts = parser.parse_args() | |
logging.basicConfig( | |
level=logging.DEBUG if opts.debug else logging.INFO, | |
format="[%(levelname)s %(asctime)s] %(message)s", | |
) | |
offline_run(opts.notebook, opts.timeout) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment