Source code for ipypublish.filters_pandoc.format_label_elements
""" a panflute filter to format elements types
that may contain reference labels, i.e. Math, Image and Table.
The :py:mod:`ipypublish.filters_pandoc.prepare_labels` filter should be run
first to access the functionality below:
If the parent of the element is a Span (or Div for Table), with a class
labelled-Math/labelled-Image/labelled-Table,
then the label of the element will be span.identifier, and
the attributes and classes from this Span will be used to inform the format.
Additionally, for HTML, if a '$$references' key is available in the metadata,
this will be used to add a suffix to the element captions,
with the number of the element.
Finally, if main() is called with strip_spans = True (the default),
The Span/Div elements with classes labelled-Math/labelled-Image/labelled-Table
will be stripped from the document
"""
# TODO format headers with section labels
# (see ipysphinx.transforms.CreateNotebookSectionAnchors)
import json
import re
from panflute import Element, Doc, Span, Div, Math, Image, Table # noqa: F401
import panflute as pf
from ipypublish.filters_pandoc.utils import convert_units, convert_attributes
from ipypublish.filters_pandoc.prepare_labels import (
LABELLED_IMAGE_CLASS,
LABELLED_MATH_CLASS,
LABELLED_TABLE_CLASS,
)
try:
from textwrap import indent
except ImportError: # added in python 3.3
def indent(text, prefix):
return "".join(prefix + line for line in text.splitlines(True))
LATEX_FIG_LABELLED = """\\begin{{figure}}[{options}]
\\hypertarget{{{label}}}{{%
\\begin{{center}}
\\adjustimage{{max size={{0.9\\linewidth}}{{0.9\\paperheight}},{size}}}{{{path}}}
\\end{{center}}
\\caption{{{caption}}}\\label{{{label}}}
}}
\\end{{figure}}""" # noqa: E501
LATEX_FIG_UNLABELLED = """\\begin{{figure}}[{options}]
\\begin{{center}}
\\adjustimage{{max size={{0.9\\linewidth}}{{0.9\\paperheight}},{size}}}{{{path}}}
\\end{{center}}
\\caption{{{caption}}}
\\end{{figure}}""" # noqa: E501
MATH_ENVS = (
"equation",
"align",
"alignat",
"eqnarray",
"multline",
"gather",
"flalign",
"dmath",
)
[docs]def format_math(math, doc):
# type: (Math, Doc) -> Element
"""
originally adapted from:
`pandoc-eqnos <https://github.com/tomduck/pandoc-eqnos/>`_
"""
if not isinstance(math, pf.Math):
return None
if math.format != "DisplayMath":
return None
# test if the math text is already wrapped in an environment
regex = re.compile(
r"\\begin\{{((?:{0})\*?)\}}(.*)\\end\{{((?:{0})\*?)\}}".format(
"|".join(MATH_ENVS)
),
re.DOTALL,
)
wrap_match = regex.match(math.text)
env = None
label = None
if isinstance(math.parent, pf.Span) and LABELLED_MATH_CLASS in math.parent.classes:
span = math.parent
numbered = "*" if "unnumbered" in span.classes else ""
env = span.attributes.get("env", "equation") + numbered
label = span.identifier
if doc.format in ("tex", "latex"):
if wrap_match:
# TODO edge case where a label has been specified, but the math is already wrapped
tex = math.text
else:
tex = "\\begin{{{0}}}{1}\\label{{{2}}}\\end{{{0}}}".format(
env or "equation", math.text, label or ""
)
return pf.RawInline(tex, format="tex")
elif doc.format in ("rst"):
if env:
tex = indent("\\begin{{{0}}}{1}\\end{{{0}}}".format(env, math.text), " ")
else:
tex = indent(math.text.strip(), " ")
rst = "\n\n.. math::\n"
if wrap_match or env:
rst += " :nowrap:\n"
if label:
rst += " :label: {}\n".format(label)
rst += "\n{}\n\n".format(tex)
return pf.RawInline(rst, format="rst")
elif doc.format in ("html", "html5"):
# new_span = pf.Span(anchor_start, math, anchor_end)
# TODO add formatting
# TODO name by count
if label:
if not wrap_match:
math.text = "\\begin{{{0}}}{1}\\end{{{0}}}".format(
env or "equation", math.text
)
return _wrap_in_anchor(math, label)
else:
return None
[docs]def format_image(image, doc):
# type: (Image, Doc) -> Element
"""
originally adapted from:
`pandoc-fignos <https://github.com/tomduck/pandoc-fignos/>`_
"""
if not isinstance(image, pf.Image):
return None
span = None
if (
isinstance(image.parent, pf.Span)
and LABELLED_IMAGE_CLASS in image.parent.classes
):
span = image.parent
if span is not None:
identifier = span.identifier
attributes = span.attributes
# classes = span.classes
else:
identifier = image.identifier
attributes = image.attributes
# classes = image.classes
if doc.format in ("tex", "latex"):
new_doc = Doc(pf.Para(*image.content))
new_doc.api_version = doc.api_version
if image.content:
caption = pf.run_pandoc(
json.dumps(new_doc.to_json()), args=["-f", "json", "-t", "latex"]
).strip()
else:
caption = ""
options = attributes.get("placement", "")
size = "" # max width set as 0.9\linewidth
if "width" in attributes:
width = convert_units(attributes["width"], "fraction")
size = "width={0}\\linewidth".format(width)
elif "height" in attributes:
height = convert_units(attributes["height"], "fraction")
size = "height={0}\\paperheight".format(height)
if identifier:
latex = LATEX_FIG_LABELLED.format(
label=identifier,
options=options,
path=image.url,
caption=caption,
size=size,
)
else:
latex = LATEX_FIG_UNLABELLED.format(
options=options, path=image.url, caption=caption, size=size
)
return pf.RawInline(latex, format="tex")
elif doc.format in ("rst",):
if not image.content.list:
# If the container is empty, then pandoc will assign an iterative
# reference identifier to it (image0, image1).
# However, this iterator restarts for each markdown cell,
# which can lead to reference clashes.
# Therefore we specifically assign the identifier here, as its url
# TODO does this identifier need to be sanitized?
# (it works fine in the tests)
identifier = image.url
image.content = pf.ListContainer(pf.Str(str(identifier)))
return image
# TODO formatting and span identifier (convert width/height to %)
elif doc.format in ("html", "html5"):
if identifier:
return _wrap_in_anchor(image, identifier)
else:
return image
# TODO formatting, name by count
else:
return None
[docs]def format_table(table, doc):
# type: (Table, Doc) -> Element
"""
originally adapted from:
`pandoc-tablenos <https://github.com/tomduck/pandoc-tablenos>`_
"""
if not isinstance(table, pf.Table):
return None
div = None # type: pf.Div
if (
isinstance(table.parent, pf.Div)
and LABELLED_TABLE_CLASS in table.parent.classes
):
div = table.parent
if div is None:
return None
attributes = convert_attributes(div.attributes)
if "align" in div.attributes:
align_text = attributes["align"]
align = [
{"l": "AlignLeft", "r": "AlignRight", "c": "AlignCenter"}.get(a, None)
for a in align_text
]
if None in align:
raise ValueError(
"table '{0}' alignment must contain only l,r,c:"
" {1}".format(div.identifier, align_text)
)
table.alignment = align
attributes["align"] = align
if "widths" in div.attributes:
widths = attributes["widths"]
try:
widths = [float(w) for w in widths]
except Exception:
raise ValueError(
"table '{0}' widths must be a list of numbers:"
" {1}".format(div.identifier, widths)
)
table.width = widths
attributes["widths"] = widths
if doc.format in ("tex", "latex"):
# TODO placement
table.caption.append(
pf.RawInline("\\label{{{0}}}".format(div.identifier), format="tex")
)
return table
if doc.format in ("rst",):
# pandoc 2.6 doesn't output table options
if attributes:
tbl_doc = pf.Doc(table)
tbl_doc.api_version = doc.api_version
tbl_str = pf.convert_text(
tbl_doc, input_format="panflute", output_format="rst"
)
tbl_lines = tbl_str.splitlines()
if tbl_lines[1].strip() == "":
tbl_lines.insert(1, " :align: center")
if "widths" in attributes:
# in rst widths must be integers
widths = " ".join([str(int(w * 10)) for w in table.width])
tbl_lines.insert(1, " :widths: {}".format(widths))
# TODO rst column alignment, see
# https://cloud-sptheme.readthedocs.io/en/latest/lib/cloud_sptheme.ext.table_styling.html
return [
pf.Para(
pf.RawInline(".. _`{0}`:".format(div.identifier), format="rst")
),
pf.RawBlock("\n".join(tbl_lines) + "\n\n", format=doc.format),
]
return [
pf.Para(pf.RawInline(".. _`{0}`:".format(div.identifier), format="rst")),
table,
]
if doc.format in ("html", "html5"):
return _wrap_in_anchor(table, div.identifier, inline=False)
# TODO formatting, name by count
[docs]def strip_labelled_spans(element, doc):
# type: (Span, Doc) -> Element
if isinstance(element, pf.Span) and set(element.classes).intersection(
[LABELLED_IMAGE_CLASS, LABELLED_MATH_CLASS]
):
return list(element.content)
if isinstance(element, pf.Div) and set(element.classes).intersection(
[LABELLED_TABLE_CLASS]
):
return list(element.content)
def _wrap_in_anchor(element, label, inline=True):
""" wrap element in html anchors
according to https://stackoverflow.com/a/1828032/5033292
can wrap inline and block elements in anchor in html5
"""
if inline:
raw = pf.RawInline
else:
raw = pf.RawBlock
anchor_start = raw(
'<a id="{0}" class="anchor-link" name="#{0}">'.format(label), format="html"
)
anchor_end = raw("</a>", format="html")
return [anchor_start, element, anchor_end]
[docs]def main(doc=None, strip_spans=True):
# type: (Doc, bool) -> None
to_run = [format_math, format_image, format_table]
if strip_spans:
to_run.append(strip_labelled_spans)
return pf.run_filters(to_run, prepare, finalize, doc=doc)
if __name__ == "__main__":
main()