--- a +++ b/docs/scripts/bibtex.py @@ -0,0 +1,292 @@ +# Based on https://github.com/darwindarak/mdx_bib +import re +import string +from collections import Counter, OrderedDict +from typing import Tuple +from xml.etree import ElementTree as etree +from xml.etree.ElementTree import tostring as etree_to_string + +from markdown.extensions import Extension +from markdown.inlinepatterns import Pattern +from markdown.preprocessors import Preprocessor +from mkdocs.config.config_options import Type as MkType +from mkdocs.plugins import BasePlugin +from pybtex.database.input import bibtex +from pybtex.exceptions import PybtexError + +BRACKET_RE = re.compile(r"\[([^\[]+)\]") +CITE_RE = re.compile(r"@([\w_:-]+)") +DEF_RE = re.compile(r"\A {0,3}\[@([\w_:-]+)\]:\s*(.*)") +INDENT_RE = re.compile(r"\A\t| {4}(.*)") + +CITATION_RE = r"(\[@(?:[\w_:-]+)(?: *, *@(?:[\w_:-]+))*\])" + + +class Bibliography(object): + """Keep track of document references and citations for exporting""" + + def __init__(self, extension, plugin, bibtex_file, order): + self.extension = extension + self.order = order + self.plugin = plugin + + self.citations = OrderedDict() + self.references = dict() + + if bibtex_file: + try: + parser = bibtex.Parser() + self.bibsource = parser.parse_file(bibtex_file).entries + self.labels = { + id: self.formatCitation(self.bibsource[id]) + for id in self.bibsource.keys() + } + for value, occurrences in Counter(self.labels.values()).items(): + if occurrences > 1: + for xkey, xvalue in self.labels.items(): + i = 0 + if xvalue == value: + self.labels[ + xkey + ] = f"{xvalue}{string.ascii_lowercase[i]}" + i += 1 + + except PybtexError: + print("Error loading bibtex file") + self.bibsource = dict() + self.labels = {} + else: + self.bibsource = dict() + + def addCitation(self, citekey): + self.citations[citekey] = self.citations.get(citekey, 0) + 1 + + def setReference(self, citekey, reference): + self.references[citekey] = reference + + def citationID(self, citekey): + return "cite-" + citekey + + def referenceID(self, citekey): + return "ref-" + citekey + + def formatAuthor(self, author): + out = ( + author.last_names[0] + + ((" " + author.first_names[0][0]) if author.first_names else "") + + "." + ) + if author.middle_names: + out += f"{author.middle_names[0][0]}." + return out.replace("{", "").replace("}", "") + + def formatAuthorSurname(self, author): + out = author.last_names[0] + return out.replace("{", "").replace("}", "") + + def formatReference(self, ref): + author_list = list(map(self.formatAuthor, ref.persons["author"])) + + if len(author_list) == 1: + authors = author_list[0] + else: + authors = ", ".join(author_list[:-1]) + authors += f" and {author_list[-1]}" + + # Harvard style + # Surname, Initial, ... and Last_Surname, + # Initial, Year. Title. Journal, Volume(Issue), pages. doi. + + title = ref.fields["title"].replace("{", "").replace("}", "") + journal = ref.fields.get("journal", "") + volume = ref.fields.get("volume", "") + issue = ref.fields.get("issue", "") + year = ref.fields.get("year") + pages = ref.fields.get("pages") + doi = ref.fields.get("doi") + url = ref.fields.get("url") + + ref_id = self.referenceID(ref.key) + ref = f"<p id={repr(ref_id)}>{authors}, {year}. {title}." + if journal: + ref += f" <i>{journal}</i>." + if volume: + ref += f" <i>{volume}</i>" + if issue: + ref += f"({issue})" + if pages: + ref += f", pp.{pages}" + ref += "." + if doi: + ref += f' <a href="https://dx.doi.org/{doi}" target="_blank">{doi}</a>' + elif url: + ref += f' <a href="{url}" target="_blank">{url}</a>' + ref += "</p>" + + return etree.fromstring(ref) + + def formatCitation(self, ref): + author_list = list(map(self.formatAuthorSurname, ref.persons["author"])) + year = ref.fields.get("year") + + if len(author_list) == 1: + citation = f"{author_list[0]}" + elif len(author_list) == 2: + citation = f"{author_list[0]} and {author_list[1]}" + else: + citation = f"{author_list[0]} et al." + + citation += f", {year}" + + return citation + + def make_bibliography(self): + if self.order == "alphabetical": + raise (NotImplementedError) + + div = etree.Element("div") + div.set("class", "footnote") + div.append(etree.Element("hr")) + ol = etree.SubElement(div, "ol") + + if not self.citations: + return div + + # table = etree.SubElement(div, "table") + # table.set("class", "references") + # tbody = etree.SubElement(table, "tbody") + etree.SubElement(div, "div") + for id in self.citations: + li = etree.SubElement(ol, "li") + li.set("id", self.referenceID(id)) + # ref_id = etree.SubElement(li, "td") + ref_txt = etree.SubElement(li, "p") + if id in self.references: + self.extension.parser.parseChunk(ref_txt, self.references[id]) + elif id in self.bibsource: + ref_txt.append(self.formatReference(self.bibsource[id])) + else: + ref_txt.text = "Missing citation for {}".format(id) + + return div + + def clear_citations(self): + self.citations = OrderedDict() + + +class CitationsPreprocessor(Preprocessor): + """Gather reference definitions and citation keys""" + + def __init__(self, bibliography): + self.bib = bibliography + + def subsequentIndents(self, lines, i): + """Concatenate consecutive indented lines""" + linesOut = [] + while i < len(lines): + m = INDENT_RE.match(lines[i]) + if m: + linesOut.append(m.group(1)) + i += 1 + else: + break + return " ".join(linesOut), i + + def run(self, lines): + linesOut = [] + i = 0 + + while i < len(lines): + # Check to see if the line starts a reference definition + m = DEF_RE.match(lines[i]) + if m: + key = m.group(1) + reference = m.group(2) + indents, i = self.subsequentIndents(lines, i + 1) + reference += " " + indents + + self.bib.setReference(key, reference) + continue + + # Look for all @citekey patterns inside hard brackets + for bracket in BRACKET_RE.findall(lines[i]): + for c in CITE_RE.findall(bracket): + self.bib.addCitation(c) + linesOut.append(lines[i]) + i += 1 + + return linesOut + + +class CitationsPattern(Pattern): + """Handles converting citations keys into links""" + + def __init__(self, pattern, bibliography): + super(CitationsPattern, self).__init__(pattern) + self.bib = bibliography + + def handleMatch(self, m): + span = etree.Element("span") + for cite_match in CITE_RE.finditer(m.group(2)): + id = cite_match.group(1) + if id in self.bib.bibsource: + a = etree.Element("a") + a.set("id", self.bib.citationID(id)) + a.set("href", "./#" + self.bib.referenceID(id)) + a.set("class", "citation") + a.text = self.bib.labels[id] + span.append(a) + else: + continue + if len(span) == 0: + return None + return span + + +context_citations = None + + +class CitationsExtension(Extension): + def __init__(self): + super(CitationsExtension, self).__init__() + self.bib = None + + def extendMarkdown(self, md): + md.registerExtension(self) + self.parser = md.parser + self.md = md + + md.preprocessors.register(CitationsPreprocessor(self.bib), "mdx_bib", 15) + md.inlinePatterns.register( + CitationsPattern(CITATION_RE, self.bib), "mdx_bib", 175 + ) + + +def makeExtension(*args, **kwargs): + return CitationsExtension(*args, **kwargs) + + +class BibTexPlugin(BasePlugin): + config_scheme: Tuple[Tuple[str, MkType]] = ( + ("bibtex_file", MkType(str)), # type: ignore[assignment] + ("order", MkType(str, default="unsorted")), # type: ignore[assignment] + ) + + def __init__(self): + self.citations = None + + def on_config(self, config, **kwargs): + extension = CitationsExtension() + self.bib = Bibliography( + extension, + self, + self.config["bibtex_file"], + self.config["order"], + ) + extension.bib = self.bib + config["markdown_extensions"].append(extension) + + def on_page_content(self, html, page, config, files): + html += "\n" + etree_to_string(self.bib.make_bibliography()).decode() + self.bib.clear_citations() + return html