Source code for ipypublish.convert.main

#!/usr/bin/env python
# import base64
from typing import List, Tuple, Union, Dict  # noqa: F401
import io
import logging
import os
import re
import shutil
import time
from traitlets.config import Config
from jsonextended import edict
from six import string_types


import ipypublish
from ipypublish.utils import pathlib
from ipypublish.scripts.nbmerge import merge_notebooks
from ipypublish.convert.config_manager import (get_export_config_path,
                                               load_export_config,
                                               load_template,
                                               create_exporter_cls)
from ipypublish.scripts.pdfexport import export_pdf


[docs]def handle_error(msg, err_type, raise_msg=None, log_msg=None):
    """handle an error, by logging it, then raising"""
    if raise_msg is None:
        raise_msg = msg
    if log_msg is None:
        log_msg = msg

    logging.error(log_msg)
    raise err_type(raise_msg)


[docs]def publish(ipynb_path,
            conversion='latex_ipypublish_main',
            outpath=None,
            dump_files=False,
            ignore_prefix='_',
            clear_existing=False,
            create_pdf=False,
            pdf_in_temp=False,
            pdf_debug=False,
            plugin_folder_paths=(),
            dry_run=False):
    """ convert one or more Jupyter notebooks to a published format

    paths can be string of an existing file or folder,
    or a pathlib.Path like object

    all files linked in the documents are placed into a single folder

    Parameters
    ----------
    ipynb_path
        notebook file or directory
    outformat: str
        output format to use
    outpath : str or pathlib.Path
        path to output converted files
    dump_files: bool
        whether to write files from nbconvert (images, etc) to outpath
    ignore_prefix: str
        ignore ipynb files with this prefix
    clear_existing : str
        whether to clear existing external files in outpath folder
    create_pdf: bool
        whether to convert to pdf (if converting to latex)
    pdf_in_temp: bool
        whether to run pdf conversion in a temporary folder
    pdf_debug: bool
        if True, run latexmk in interactive mode
    dry_run: bool
        if True, do not create any files

    Returns
    --------
    outpath: str
        path to output file
    exporter: nbconvert.exporters.Exporter
        the exporter used

    """
    # setup the input and output paths
    if isinstance(ipynb_path, string_types):
        ipynb_path = pathlib.Path(ipynb_path)
    ipynb_name = os.path.splitext(ipynb_path.name)[0]
    files_folder = ipynb_name + '_files'
    outdir = os.path.join(
        os.getcwd(), 'converted') if outpath is None else outpath

    if not dry_run and not os.path.exists(outdir):
        os.mkdir(outdir)

    # log start of conversion
    logging.info('started ipypublish v{0} at {1}'.format(
        ipypublish.__version__, time.strftime("%c")))
    logging.info('logging to: {}'.format(
        os.path.join(outdir, ipynb_name + '.nbpub.log')))
    logging.info('running for ipynb(s) at: {0}'.format(ipynb_path))
    logging.info('with conversion configuration: {0}'.format(conversion))

    # merge all notebooks (this handles checking ipynb_path exists)
    final_nb, meta_path = merge_notebooks(ipynb_path,
                                          ignore_prefix=ignore_prefix)
    logging.debug('notebooks meta path: {}'.format(meta_path))

    # find conversion configuration
    logging.info('finding conversion configuration: {}'.format(conversion))
    export_config_path = None
    if isinstance(conversion, string_types):
        outformat_path = pathlib.Path(conversion)
    else:
        outformat_path = conversion
    if outformat_path.exists():  # TODO use pathlib approach
        # if is outformat is a path that exists, use that
        export_config_path = outformat_path
    else:
        # else search internally
        export_config_path = get_export_config_path(
            conversion, plugin_folder_paths)

    if export_config_path is None:
        handle_error(
            "could not find conversion configuration: {}".format(conversion),
            IOError)

    # read conversion configuration and create
    logging.info('loading conversion configuration')
    data = load_export_config(export_config_path)
    logging.info('creating exporter')
    exporter_cls = create_exporter_cls(data["exporter"]["class"])
    logging.info('creating template')
    jinja_template = load_template(data["template"])
    logging.info('creating nbconvert configuration')
    config = create_config(data["exporter"],
                           {"${meta_path}": str(meta_path),
                            "${files_path}": str(files_folder)})

    # run nbconvert
    logging.info('running nbconvert')
    exporter, body, resources = export_notebook(final_nb, exporter_cls,
                                                config, jinja_template)

    # postprocess results
    body, resources, internal_files = postprocess_nb(body, resources)

    if dry_run:
        return outpath, exporter

    # write results
    logging.info("writing results")
    main_file_name = ipynb_name + exporter.file_extension
    outpath, outfilespath = write_output(body, resources, outdir,
                                         main_file_name,
                                         dump_files or create_pdf,
                                         files_folder, internal_files,
                                         clear_existing)

    # create pdf
    if create_pdf and exporter.output_mimetype == 'text/latex':
        logging.info('running pdf conversion')

        if not export_pdf(outpath, outdir=outdir,
                          files_path=outfilespath,
                          convert_in_temp=pdf_in_temp,
                          html_viewer=True,
                          debug_mode=pdf_debug):
            handle_error('pdf export failed, try running with pdf_debug=True',
                         RuntimeError)

    logging.info('process finished successfully')

    return outpath, exporter


[docs]def create_config(exporter_data, replacements):
    # type: (dict, Dict[str, str]) -> Config
    config = {}
    exporter_name = exporter_data["class"].split(".")[-1]
    config[exporter_name + ".filters"] = exporter_data.get("filters", [])

    preprocessors = []
    for preproc in exporter_data.get("preprocessors", []):
        preprocessors.append(preproc["class"])
        preproc_name = preproc["class"].split(".")[-1]
        for name, val in preproc.get("args", {}).items():
            if isinstance(val, string_types):
                for instr, outstr in replacements.items():
                    val = val.replace(instr, outstr)
            config[preproc_name + "." + name] = val
    config[exporter_name + ".preprocessors"] = preprocessors

    for name, val in exporter_data.get("other_args", {}).items():
        if isinstance(val, string_types):
            for instr, outstr in replacements.items():
                val = val.replace(instr, outstr)
        config[name] = val

    # ensure file paths point towards the right folder
    files_path = "${files_path}"
    for instr, outstr in replacements.items():
        files_path = files_path.replace(instr, outstr)
    config[
        'ExtractOutputPreprocessor.output_filename_template'
    ] = files_path + '/{unique_key}_{cell_index}_{index}{extension}'

    return dict_to_config(config, True)


[docs]def dict_to_config(config, unflatten=True):
    if unflatten:
        config = edict.unflatten(config, key_as_tuple=False, delim=".")
    return Config(config)


[docs]def export_notebook(final_nb, exporter_cls, config, jinja_template):
    exporter = exporter_cls(
        config=config,
        extra_loaders=[] if jinja_template is None else [jinja_template])
    body, resources = exporter.from_notebook_node(final_nb)
    return exporter, body, resources


[docs]def postprocess_nb(body, resources):
    # TODO could this be written as nbconvert component?

    # reduce multiple blank lines to single
    body = re.sub(r'\n\s*\n', '\n\n', body)
    # make sure references refer to correct slides
    if 'refslide' in resources:
        for k, (col, row) in resources['refslide'].items():
            body = body.replace('{{id_home_prefix}}{0}'.format(
                k), '#/{0}/{1}{2}'.format(col, row, k))

    # filter internal files by those that are referenced in the document body
    if resources['outputs']:
        for path in list(resources['outputs'].keys()):
            if path not in body:
                resources['outputs'].pop(path)
        internal_files = resources['outputs']
    else:
        internal_files = {}

    return body, resources, internal_files


[docs]def write_output(body, resources, outdir, main_file_name, output_external,
                 files_folder, internal_files, clear_existing):
    # TODO should this be done using an nbconvert writer?
    # e.g. nbconvert.writers.FilesWriter

    # output main file
    outpath = os.path.join(outdir, main_file_name)
    outfilespath = os.path.join(outdir, files_folder)

    logging.info('outputting converted file to: {}'.format(outpath))
    with io.open(outpath, "w", encoding='utf8') as fh:
        fh.write(body)

    # output external files
    if output_external:
        logging.info('dumping external files to: {}'.format(outfilespath))

        if os.path.exists(outfilespath):
            if clear_existing:
                shutil.rmtree(outfilespath)
        else:
            os.mkdir(outfilespath)

        for internal_path, fcontents in internal_files.items():
            with open(os.path.join(outdir, internal_path), "wb") as fh:
                fh.write(fcontents)
        for external_path in resources['external_file_paths']:
            shutil.copyfile(external_path,
                            os.path.join(outfilespath,
                                         os.path.basename(external_path)))

    return outpath, outfilespath


if __name__ == "__main__":
    publish("/Users/cjs14/GitHub/ipypublish/example/notebooks/Example.ipynb",
            dry_run=True)