Skip to content

Instantly share code, notes, and snippets.

@minrk
Last active April 2, 2016 20:07
Show Gist options
  • Save minrk/3013fe19112055523172 to your computer and use it in GitHub Desktop.
Save minrk/3013fe19112055523172 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
nb-offline: run notebooks offline
Usage:
nb-offline my-notebook.ipynb
Will save my-notebook-intermediate.ipynb every time there's an output, and save
my-notebook-done.ipynb when it's all done.
This is basically `nbconvert --execute`, but it saves an `-intermediate` file on every output.
It's a bit of a mess, because I made it in a hurry for @ctitusbrown.
In particular, this shouldn't need as much copy/paste from ExecutePreprocessor as it does.
"""
# Copyright (c) Min RK.
# Distributed under the terms of the Modified BSD License.
import io
import logging
import os
import sys
import time
from queue import Empty
import nbformat
from nbformat.v4 import output_from_msg
from nbconvert.exporters import NotebookExporter
from nbconvert.preprocessors import ExecutePreprocessor
from traitlets import Unicode
class OfflinePreprocessor(ExecutePreprocessor):
"""Like ExecutePreprocessor, but saves the notebook on every output,
so that it can be viewed.
"""
intermediate_path = Unicode()
def preprocess(self, nb, resources):
self.notebook = nb
print(resources)
self._original_get_iopub = None
for cell in nb['cells']:
if cell['cell_type'] == 'code':
cell['outputs'] = []
return super(OfflinePreprocessor, self).preprocess(nb, resources)
def run_cell(self, cell):
msg_id = self.kc.execute(cell.source)
self.log.debug("Executing cell:\n%s", cell.source)
timeout = self.timeout
start = time.time()
if timeout < 0:
timeout = 3e9 # 100 years ~= forever
deadline = start + timeout
outs = cell.outputs = []
# get output as it comes, and save on every
while True:
timeout = max(deadline-time.time(), 0)
try:
msg = self.kc.iopub_channel.get_msg(timeout=timeout)
except Empty:
self.log.error("Timeout waiting for IOPub output. You may want to set --timeout to a larger value")
raise TimeoutError("Cell execution timed out, see log for details.") from None
if msg['parent_header'].get('msg_id') != msg_id:
# not an output from our execution
continue
msg_type = msg['msg_type']
self.log.debug("output: %s", msg_type)
content = msg['content']
# set the prompt number for the input and the output
if 'execution_count' in content:
cell['execution_count'] = content['execution_count']
self.save()
if msg_type == 'status':
if content['execution_state'] == 'idle':
break
else:
continue
elif msg_type == 'execute_input':
continue
elif msg_type == 'clear_output':
cell.outputs = outs = []
continue
elif msg_type.startswith('comm'):
continue
try:
out = output_from_msg(msg)
except ValueError:
self.log.error("unhandled iopub msg: " + msg_type)
else:
outs.append(out)
self.save()
# wait for finish, with timeout
while True:
timeout = max(deadline-time.time(), 0)
try:
msg = self.kc.shell_channel.get_msg(timeout=timeout)
except Empty:
self.log.warn("Timeout waiting for Execute result. You may want to set --timeout to a larger value")
if self.interrupt_on_timeout:
self.log.error("Interrupting kernel")
self.km.interrupt_kernel()
break
else:
raise TimeoutError("Cell execution timed out, see log"
" for details.") from None
if msg['parent_header'].get('msg_id') == msg_id:
break
else:
# not our reply
continue
return outs
def save(self):
self.log.info("Saving %s", self.intermediate_path)
nbformat.write(self.notebook, self.intermediate_path)
def offline_run(from_path, timeout=-1):
"""Run a notebook offline
Timeout is per cell.
"""
base_name, ext = os.path.splitext(from_path)
intermediate_path = base_name + '-intermediate' + ext
to_path = base_name + '-done' + ext
exporter = NotebookExporter()
preprocessor = OfflinePreprocessor(
intermediate_path=intermediate_path,
timeout=timeout,
)
exporter.register_preprocessor(preprocessor, enabled=True)
exporter.preprocessors.append(preprocessor)
nb_json, resources = exporter.from_file(from_path)
with io.open(to_path, 'w', encoding='utf8') as f:
f.write(nb_json)
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('notebook', type=str, help="Notebook file to run offline")
parser.add_argument('--debug', action='store_true', help="Debug-level logging")
parser.add_argument('--timeout', type=int, default=-1, help="Timeout (in seconds) to wait for each cell's execution to complete.")
opts = parser.parse_args()
logging.basicConfig(
level=logging.DEBUG if opts.debug else logging.INFO,
format="[%(levelname)s %(asctime)s] %(message)s",
)
offline_run(opts.notebook, opts.timeout)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment