import copy
import io
import logging
import os
import bibtexparser
from ipypublish.bib2glossary.definitions import (
ETYPE_GLOSS,
ETYPE_ACRONYM,
ETYPE_SYMBOL,
NEWGLOSS_FIELDS,
NEWACRONYM_FIELDS,
)
try:
from collections.abc import MutableMapping
except ImportError:
from collections import MutableMapping
logger = logging.getLogger(__name__)
[docs]class BibGlossEntry(object):
_allowed_types = (ETYPE_GLOSS, ETYPE_ACRONYM, ETYPE_SYMBOL)
def __init__(self, entry_dict):
self._validate_dict(entry_dict)
self._entry_dict = entry_dict
def _validate_dict(self, dct):
if "ID" not in dct:
raise KeyError
if "ENTRYTYPE" not in dct:
raise KeyError
if dct["ENTRYTYPE"] not in self._allowed_types:
raise TypeError("ENTRYTYPE must be one of: {}".format(self._allowed_types))
if dct["ENTRYTYPE"] == ETYPE_ACRONYM:
if "abbreviation" not in dct or "longname" not in dct:
raise KeyError
elif dct["ENTRYTYPE"] == ETYPE_GLOSS or dct["ENTRYTYPE"] == ETYPE_SYMBOL:
if "name" not in dct or "description" not in dct:
raise KeyError
def _get_key(self):
return self._entry_dict["ID"]
def _set_key(self, key):
self._entry_dict["ID"] = key
key = property(_get_key, _set_key)
@property
def type(self):
return self._entry_dict["ENTRYTYPE"]
def __contains__(self, key):
return key in self._entry_dict
[docs] def get(self, key):
return self._entry_dict[key]
@property
def label(self):
if self.type == ETYPE_ACRONYM:
return self.get("abbreviation")
elif self.type == ETYPE_GLOSS:
return self.get("name")
elif self.type == ETYPE_SYMBOL:
return self.get("name")
else:
raise NotImplementedError
@property
def sortkey(self):
if "sort" in self:
return self.get("sort")
else:
return self.label.lower()
@property
def plural(self):
if "plural" in self:
return self.get("plural")
else:
return "{}s".format(self.label)
@property
def text(self):
if self.type == ETYPE_ACRONYM:
return self.get("longname")
elif self.type == ETYPE_GLOSS:
return self.get("description")
elif self.type == ETYPE_SYMBOL:
return self.get("description")
else:
raise NotImplementedError
def __repr__(self):
return "BibGlossEntry(key={0},label={1})".format(self.key, self.label)
[docs] def to_dict(self):
return copy.deepcopy(self._entry_dict)
[docs] def to_latex(self):
if self.type in [ETYPE_GLOSS, ETYPE_SYMBOL]:
options = []
for field in sorted(NEWGLOSS_FIELDS):
if field in self:
options.append("{0}={{{1}}}".format(field, self.get(field)))
if self.type == ETYPE_SYMBOL:
options.append("type={symbols}")
body = "{{{key}}}{{\n {options}\n}}".format(
key=self.key, options=",\n ".join(options)
)
return "\\newglossaryentry" + body
elif self.type == ETYPE_ACRONYM:
body = "{{{key}}}{{{abbrev}}}{{{long}}}".format(
key=self.key, abbrev=self.label, long=self.text
)
options = []
for field in sorted(NEWACRONYM_FIELDS):
if field in self:
options.append("{0}={{{1}}}".format(field, self.get(field)))
if options:
body = "[" + ",".join(options) + "]" + body
return "\\newacronym" + body
[docs]class BibGlossDB(MutableMapping):
def __init__(self):
self._entries = {}
def __getitem__(self, key):
return self._entries[key]
def __setitem__(self, key, entry):
if not isinstance(entry, BibGlossEntry):
raise ValueError("value must be a BibGlossEntry")
if key != entry.key:
raise ValueError("key must equal entry.key")
self._entries[key] = entry
def __delitem__(self, key):
del self._entries[key]
def __iter__(self):
return iter(self._entries)
def __len__(self):
return len(self._entries)
[docs] @staticmethod
def get_fake_entry_obj(key):
return BibGlossEntry(
{"ENTRYTYPE": ETYPE_GLOSS, "ID": key, "name": key, "description": ""}
)
[docs] def load_bib(
self,
text_str=None,
path=None,
bibdb=None,
encoding="utf8",
ignore_nongloss_types=False,
ignore_duplicates=False,
):
"""load a bib file
Parameters
----------
text_str=None: str or None
string representing the bib file contents
path=None: str or None
path to bibfile
bibdb=None: bibtexparser.bibdatabase.BibDatabase or None
encoding="utf8": str
bib file encoding
ignore_nongloss_types: bool
if False, a KeyError will be raised for non-gloss types
ignore_duplicates: bool
if False, a KeyError will be raised if multiple entries are found
with the same key, otherwise only the first entry will be used
"""
bib = None
if sum([e is not None for e in [text_str, path, bibdb]]) != 1:
raise ValueError("only one of text_str, path or bib must be supplied")
if bibdb is not None:
if not isinstance(bibdb, bibtexparser.bibdatabase.BibDatabase):
raise ValueError("bib is not a BibDatabase instance")
bib = bibdb
elif path is not None:
if text_str is not None:
raise ValueError("text_str and path cannot be set at the same time")
with io.open(path, encoding=encoding) as fobj:
text_str = fobj.read()
if bib is None:
parser = bibtexparser.bparser.BibTexParser()
parser.ignore_nonstandard_types = False
parser.encoding = encoding
bib = parser.parse(text_str)
# TODO doesn't appear to check for key duplication
# see https://github.com/sciunto-org/python-bibtexparser/issues/237
entries = {}
for entry_dict in bib.entries:
try:
entry = BibGlossEntry(entry_dict)
except TypeError:
if ignore_nongloss_types:
logger.warning("Skipping non-glossary entry")
continue
else:
raise
if entry.key in entries:
if ignore_duplicates:
logger.warning("Skipping duplicate key entry")
continue
else:
raise KeyError(
"the bib file contains "
"multiple entries with the key: {}".format(entry.key)
)
entries[entry.key] = entry
# self._bib = bib
self._entries = entries
return True
[docs] def load_tex(
self,
text_str=None,
path=None,
encoding="utf8",
skip_ioerrors=False,
ignore_unknown_types=True,
):
"""load a tex file
Parameters
----------
text_str=None: str or None
string representing the bib file contents
path=None: str or None
path to bibfile
bibdb=None: bibtexparser.bibdatabase.BibDatabase or None
encoding="utf8": str
bib file encoding
skip_ioerrors: bool
if False, an IOError will be raised if
newglossaryterm or newacronym is badly formatted
ignore_unknown_types: bool
if True, strip unknown types, otherwise raise a ValueError
Notes
-----
the texsoup package is required.
if a newglossaryterm has field 'type={symbols}', then
it will be loaded as a symbol
"""
from ipypublish.bib2glossary.parse_tex import parse_tex
gterms, acronyms = parse_tex(
text_str=text_str, path=path, encoding=encoding, skip_ioerrors=skip_ioerrors
)
entries = {}
for key, fields in gterms.items():
fields["ENTRYTYPE"] = ETYPE_GLOSS
if fields.get("type", None) == "symbols":
fields["ENTRYTYPE"] = ETYPE_SYMBOL
fields.pop("type")
elif "type" in fields:
if not ignore_unknown_types:
raise ValueError(
"the 'type' is not recognised: " "{}".format(fields["type"])
)
fields.pop("type")
fields["ID"] = key
entry = BibGlossEntry(fields)
entries[entry.key] = entry
for key, fields in acronyms.items():
fields["ENTRYTYPE"] = ETYPE_ACRONYM
fields["ID"] = key
entry = BibGlossEntry(fields)
entries[entry.key] = entry
self._entries = entries
return True
[docs] @staticmethod
def guess_path(path):
""" guess the path of a bib file, with or without a file extension,
from the available files in the path folder
"""
basepath, extension = os.path.splitext(str(path))
if extension in [".bib", ".biblatex", ".bibtex"]:
return path
elif extension in [".tex", ".latex"]:
return path
elif os.path.exists(basepath + ".bib"):
return basepath + ".bib"
elif os.path.exists(basepath + ".bibtex"):
return basepath + ".bibtex"
elif os.path.exists(basepath + ".biblatex"):
return basepath + ".biblatex"
elif os.path.exists(basepath + ".tex"):
return basepath + ".tex"
elif os.path.exists(basepath + ".latex"):
return basepath + ".latex"
else:
return None
[docs] def load(self, path, encoding="utf8"):
"""load a file, the type will be guessed from the extension,
or (if no extension is given), the available files in the path folder
Parameters
----------
path: str
encoding='utf8': str
encoding of the file
"""
path = self.guess_path(path)
if path is None:
raise IOError("no acceptable loader found for path: {}".format(path))
basepath, extension = os.path.splitext(str(path))
if extension in [".bib", ".biblatex", ".bibtex"]:
self.load_bib(path=path, encoding=encoding)
elif extension in [".tex", ".latex"]:
self.load_tex(path=path, encoding=encoding)
[docs] def to_dict(self):
return {k: e.to_dict() for k, e in self.items()}
[docs] def to_bib_string(self):
bibdb = bibtexparser.bibdatabase.BibDatabase()
bibdb.entries = [e.to_dict() for e in self.values()]
writer = bibtexparser.bwriter.BibTexWriter()
writer.contents = ["comments", "entries"]
writer.indent = " "
# writer.order_entries_by = ('ENTRYTYPE', 'ID')
return writer.write(bibdb)
[docs] def to_latex_dict(self, splitlines=True):
"""convert to dict of latex strings
Returns
-------
dict:
{(<type>, <key>): <latex string>}
"""
latex_stings = {}
for entry in self.values():
string = entry.to_latex()
if splitlines:
string = string.splitlines()
latex_stings[(entry.type, entry.key)] = string
return latex_stings
[docs] def to_latex_string(self):
lines = []
latex_dict = self.to_latex_dict(splitlines=False)
for key in sorted(list(latex_dict.keys())):
lines.append(latex_dict[key])
return "\n".join(lines)