Source code for ipypublish.filters_pandoc.format_label_elements

""" a panflute filter to format elements types
that may contain reference labels, i.e. Math, Image and Table.

The :py:mod:`ipypublish.filters_pandoc.prepare_labels` filter should be run
first to access the functionality below:

If the parent of the element is a Span (or Div for Table), with a class
then the label of the element will be span.identifier, and
the attributes and classes from this Span will be used to inform the format.

Additionally, for HTML, if a '$$references' key is available in the metadata,
this will be used to add a suffix to the element captions,
with the number of the element.

Finally, if main() is called with strip_spans = True (the default),
The Span/Div elements with classes labelled-Math/labelled-Image/labelled-Table
will be stripped from the document

# TODO format headers with section labels
# (see ipysphinx.transforms.CreateNotebookSectionAnchors)
import json
import re

from panflute import Element, Doc, Span, Div, Math, Image, Table  # noqa: F401
import panflute as pf

from ipypublish.filters_pandoc.utils import convert_units, convert_attributes
from ipypublish.filters_pandoc.prepare_labels import (

    from textwrap import indent
except ImportError:  # added in python 3.3

    def indent(text, prefix):
        return "".join(prefix + line for line in text.splitlines(True))

LATEX_FIG_LABELLED = """\\begin{{figure}}[{options}]
\\adjustimage{{max size={{0.9\\linewidth}}{{0.9\\paperheight}},{size}}}{{{path}}}
\\end{{figure}}"""  # noqa: E501

LATEX_FIG_UNLABELLED = """\\begin{{figure}}[{options}]
\\adjustimage{{max size={{0.9\\linewidth}}{{0.9\\paperheight}},{size}}}{{{path}}}
\\end{{figure}}"""  # noqa: E501


[docs]def format_math(math, doc): # type: (Math, Doc) -> Element """ originally adapted from: `pandoc-eqnos <>`_ """ if not isinstance(math, pf.Math): return None if math.format != "DisplayMath": return None # test if the math text is already wrapped in an environment regex = re.compile( r"\\begin\{{((?:{0})\*?)\}}(.*)\\end\{{((?:{0})\*?)\}}".format( "|".join(MATH_ENVS) ), re.DOTALL, ) wrap_match = regex.match(math.text) env = None label = None if isinstance(math.parent, pf.Span) and LABELLED_MATH_CLASS in math.parent.classes: span = math.parent numbered = "*" if "unnumbered" in span.classes else "" env = span.attributes.get("env", "equation") + numbered label = span.identifier if doc.format in ("tex", "latex"): if wrap_match: # TODO edge case where a label has been specified, but the math is already wrapped tex = math.text else: tex = "\\begin{{{0}}}{1}\\label{{{2}}}\\end{{{0}}}".format( env or "equation", math.text, label or "" ) return pf.RawInline(tex, format="tex") elif doc.format in ("rst"): if env: tex = indent("\\begin{{{0}}}{1}\\end{{{0}}}".format(env, math.text), " ") else: tex = indent(math.text.strip(), " ") rst = "\n\n.. math::\n" if wrap_match or env: rst += " :nowrap:\n" if label: rst += " :label: {}\n".format(label) rst += "\n{}\n\n".format(tex) return pf.RawInline(rst, format="rst") elif doc.format in ("html", "html5"): # new_span = pf.Span(anchor_start, math, anchor_end) # TODO add formatting # TODO name by count if label: if not wrap_match: math.text = "\\begin{{{0}}}{1}\\end{{{0}}}".format( env or "equation", math.text ) return _wrap_in_anchor(math, label) else: return None
[docs]def format_image(image, doc): # type: (Image, Doc) -> Element """ originally adapted from: `pandoc-fignos <>`_ """ if not isinstance(image, pf.Image): return None span = None if ( isinstance(image.parent, pf.Span) and LABELLED_IMAGE_CLASS in image.parent.classes ): span = image.parent if span is not None: identifier = span.identifier attributes = span.attributes # classes = span.classes else: identifier = image.identifier attributes = image.attributes # classes = image.classes if doc.format in ("tex", "latex"): new_doc = Doc(pf.Para(*image.content)) new_doc.api_version = doc.api_version if image.content: caption = pf.run_pandoc( json.dumps(new_doc.to_json()), args=["-f", "json", "-t", "latex"] ).strip() else: caption = "" options = attributes.get("placement", "") size = "" # max width set as 0.9\linewidth if "width" in attributes: width = convert_units(attributes["width"], "fraction") size = "width={0}\\linewidth".format(width) elif "height" in attributes: height = convert_units(attributes["height"], "fraction") size = "height={0}\\paperheight".format(height) if identifier: latex = LATEX_FIG_LABELLED.format( label=identifier, options=options, path=image.url, caption=caption, size=size, ) else: latex = LATEX_FIG_UNLABELLED.format( options=options, path=image.url, caption=caption, size=size ) return pf.RawInline(latex, format="tex") elif doc.format in ("rst",): if not image.content.list: # If the container is empty, then pandoc will assign an iterative # reference identifier to it (image0, image1). # However, this iterator restarts for each markdown cell, # which can lead to reference clashes. # Therefore we specifically assign the identifier here, as its url # TODO does this identifier need to be sanitized? # (it works fine in the tests) identifier = image.url image.content = pf.ListContainer(pf.Str(str(identifier))) return image # TODO formatting and span identifier (convert width/height to %) elif doc.format in ("html", "html5"): if identifier: return _wrap_in_anchor(image, identifier) else: return image # TODO formatting, name by count else: return None
[docs]def format_table(table, doc): # type: (Table, Doc) -> Element """ originally adapted from: `pandoc-tablenos <>`_ """ if not isinstance(table, pf.Table): return None div = None # type: pf.Div if ( isinstance(table.parent, pf.Div) and LABELLED_TABLE_CLASS in table.parent.classes ): div = table.parent if div is None: return None attributes = convert_attributes(div.attributes) if "align" in div.attributes: align_text = attributes["align"] align = [ {"l": "AlignLeft", "r": "AlignRight", "c": "AlignCenter"}.get(a, None) for a in align_text ] if None in align: raise ValueError( "table '{0}' alignment must contain only l,r,c:" " {1}".format(div.identifier, align_text) ) table.alignment = align attributes["align"] = align if "widths" in div.attributes: widths = attributes["widths"] try: widths = [float(w) for w in widths] except Exception: raise ValueError( "table '{0}' widths must be a list of numbers:" " {1}".format(div.identifier, widths) ) table.width = widths attributes["widths"] = widths if doc.format in ("tex", "latex"): # TODO placement table.caption.append( pf.RawInline("\\label{{{0}}}".format(div.identifier), format="tex") ) return table if doc.format in ("rst",): # pandoc 2.6 doesn't output table options if attributes: tbl_doc = pf.Doc(table) tbl_doc.api_version = doc.api_version tbl_str = pf.convert_text( tbl_doc, input_format="panflute", output_format="rst" ) tbl_lines = tbl_str.splitlines() if tbl_lines[1].strip() == "": tbl_lines.insert(1, " :align: center") if "widths" in attributes: # in rst widths must be integers widths = " ".join([str(int(w * 10)) for w in table.width]) tbl_lines.insert(1, " :widths: {}".format(widths)) # TODO rst column alignment, see # return [ pf.Para( pf.RawInline(".. _`{0}`:".format(div.identifier), format="rst") ), pf.RawBlock("\n".join(tbl_lines) + "\n\n", format=doc.format), ] return [ pf.Para(pf.RawInline(".. _`{0}`:".format(div.identifier), format="rst")), table, ] if doc.format in ("html", "html5"): return _wrap_in_anchor(table, div.identifier, inline=False)
# TODO formatting, name by count
[docs]def strip_labelled_spans(element, doc): # type: (Span, Doc) -> Element if isinstance(element, pf.Span) and set(element.classes).intersection( [LABELLED_IMAGE_CLASS, LABELLED_MATH_CLASS] ): return list(element.content) if isinstance(element, pf.Div) and set(element.classes).intersection( [LABELLED_TABLE_CLASS] ): return list(element.content)
def _wrap_in_anchor(element, label, inline=True): """ wrap element in html anchors according to can wrap inline and block elements in anchor in html5 """ if inline: raw = pf.RawInline else: raw = pf.RawBlock anchor_start = raw( '<a id="{0}" class="anchor-link" name="#{0}">'.format(label), format="html" ) anchor_end = raw("</a>", format="html") return [anchor_start, element, anchor_end]
[docs]def prepare(doc): # type: (Doc) -> None pass
[docs]def finalize(doc): # type: (Doc) -> None pass
[docs]def main(doc=None, strip_spans=True): # type: (Doc, bool) -> None to_run = [format_math, format_image, format_table] if strip_spans: to_run.append(strip_labelled_spans) return pf.run_filters(to_run, prepare, finalize, doc=doc)
if __name__ == "__main__": main()