Diff of /docs/scripts/bibtex.py [000000] .. [cad161]

Switch to unified view

a b/docs/scripts/bibtex.py
1
# Based on https://github.com/darwindarak/mdx_bib
2
import re
3
import string
4
from collections import Counter, OrderedDict
5
from typing import Tuple
6
from xml.etree import ElementTree as etree
7
from xml.etree.ElementTree import tostring as etree_to_string
8
9
from markdown.extensions import Extension
10
from markdown.inlinepatterns import Pattern
11
from markdown.preprocessors import Preprocessor
12
from mkdocs.config.config_options import Type as MkType
13
from mkdocs.plugins import BasePlugin
14
from pybtex.database.input import bibtex
15
from pybtex.exceptions import PybtexError
16
17
BRACKET_RE = re.compile(r"\[([^\[]+)\]")
18
CITE_RE = re.compile(r"@([\w_:-]+)")
19
DEF_RE = re.compile(r"\A {0,3}\[@([\w_:-]+)\]:\s*(.*)")
20
INDENT_RE = re.compile(r"\A\t| {4}(.*)")
21
22
CITATION_RE = r"(\[@(?:[\w_:-]+)(?: *, *@(?:[\w_:-]+))*\])"
23
24
25
class Bibliography(object):
26
    """Keep track of document references and citations for exporting"""
27
28
    def __init__(self, extension, plugin, bibtex_file, order):
29
        self.extension = extension
30
        self.order = order
31
        self.plugin = plugin
32
33
        self.citations = OrderedDict()
34
        self.references = dict()
35
36
        if bibtex_file:
37
            try:
38
                parser = bibtex.Parser()
39
                self.bibsource = parser.parse_file(bibtex_file).entries
40
                self.labels = {
41
                    id: self.formatCitation(self.bibsource[id])
42
                    for id in self.bibsource.keys()
43
                }
44
                for value, occurrences in Counter(self.labels.values()).items():
45
                    if occurrences > 1:
46
                        for xkey, xvalue in self.labels.items():
47
                            i = 0
48
                            if xvalue == value:
49
                                self.labels[
50
                                    xkey
51
                                ] = f"{xvalue}{string.ascii_lowercase[i]}"
52
                                i += 1
53
54
            except PybtexError:
55
                print("Error loading bibtex file")
56
                self.bibsource = dict()
57
                self.labels = {}
58
        else:
59
            self.bibsource = dict()
60
61
    def addCitation(self, citekey):
62
        self.citations[citekey] = self.citations.get(citekey, 0) + 1
63
64
    def setReference(self, citekey, reference):
65
        self.references[citekey] = reference
66
67
    def citationID(self, citekey):
68
        return "cite-" + citekey
69
70
    def referenceID(self, citekey):
71
        return "ref-" + citekey
72
73
    def formatAuthor(self, author):
74
        out = (
75
            author.last_names[0]
76
            + ((" " + author.first_names[0][0]) if author.first_names else "")
77
            + "."
78
        )
79
        if author.middle_names:
80
            out += f"{author.middle_names[0][0]}."
81
        return out.replace("{", "").replace("}", "")
82
83
    def formatAuthorSurname(self, author):
84
        out = author.last_names[0]
85
        return out.replace("{", "").replace("}", "")
86
87
    def formatReference(self, ref):
88
        author_list = list(map(self.formatAuthor, ref.persons["author"]))
89
90
        if len(author_list) == 1:
91
            authors = author_list[0]
92
        else:
93
            authors = ", ".join(author_list[:-1])
94
            authors += f" and {author_list[-1]}"
95
96
        # Harvard style
97
        # Surname, Initial, ... and Last_Surname,
98
        # Initial, Year. Title. Journal, Volume(Issue), pages. doi.
99
100
        title = ref.fields["title"].replace("{", "").replace("}", "")
101
        journal = ref.fields.get("journal", "")
102
        volume = ref.fields.get("volume", "")
103
        issue = ref.fields.get("issue", "")
104
        year = ref.fields.get("year")
105
        pages = ref.fields.get("pages")
106
        doi = ref.fields.get("doi")
107
        url = ref.fields.get("url")
108
109
        ref_id = self.referenceID(ref.key)
110
        ref = f"<p id={repr(ref_id)}>{authors}, {year}. {title}."
111
        if journal:
112
            ref += f" <i>{journal}</i>."
113
            if volume:
114
                ref += f" <i>{volume}</i>"
115
            if issue:
116
                ref += f"({issue})"
117
            if pages:
118
                ref += f", pp.{pages}"
119
            ref += "."
120
        if doi:
121
            ref += f' <a href="https://dx.doi.org/{doi}" target="_blank">{doi}</a>'
122
        elif url:
123
            ref += f' <a href="{url}" target="_blank">{url}</a>'
124
        ref += "</p>"
125
126
        return etree.fromstring(ref)
127
128
    def formatCitation(self, ref):
129
        author_list = list(map(self.formatAuthorSurname, ref.persons["author"]))
130
        year = ref.fields.get("year")
131
132
        if len(author_list) == 1:
133
            citation = f"{author_list[0]}"
134
        elif len(author_list) == 2:
135
            citation = f"{author_list[0]} and {author_list[1]}"
136
        else:
137
            citation = f"{author_list[0]} et al."
138
139
        citation += f", {year}"
140
141
        return citation
142
143
    def make_bibliography(self):
144
        if self.order == "alphabetical":
145
            raise (NotImplementedError)
146
147
        div = etree.Element("div")
148
        div.set("class", "footnote")
149
        div.append(etree.Element("hr"))
150
        ol = etree.SubElement(div, "ol")
151
152
        if not self.citations:
153
            return div
154
155
        # table = etree.SubElement(div, "table")
156
        # table.set("class", "references")
157
        # tbody = etree.SubElement(table, "tbody")
158
        etree.SubElement(div, "div")
159
        for id in self.citations:
160
            li = etree.SubElement(ol, "li")
161
            li.set("id", self.referenceID(id))
162
            # ref_id = etree.SubElement(li, "td")
163
            ref_txt = etree.SubElement(li, "p")
164
            if id in self.references:
165
                self.extension.parser.parseChunk(ref_txt, self.references[id])
166
            elif id in self.bibsource:
167
                ref_txt.append(self.formatReference(self.bibsource[id]))
168
            else:
169
                ref_txt.text = "Missing citation for {}".format(id)
170
171
        return div
172
173
    def clear_citations(self):
174
        self.citations = OrderedDict()
175
176
177
class CitationsPreprocessor(Preprocessor):
178
    """Gather reference definitions and citation keys"""
179
180
    def __init__(self, bibliography):
181
        self.bib = bibliography
182
183
    def subsequentIndents(self, lines, i):
184
        """Concatenate consecutive indented lines"""
185
        linesOut = []
186
        while i < len(lines):
187
            m = INDENT_RE.match(lines[i])
188
            if m:
189
                linesOut.append(m.group(1))
190
                i += 1
191
            else:
192
                break
193
        return " ".join(linesOut), i
194
195
    def run(self, lines):
196
        linesOut = []
197
        i = 0
198
199
        while i < len(lines):
200
            # Check to see if the line starts a reference definition
201
            m = DEF_RE.match(lines[i])
202
            if m:
203
                key = m.group(1)
204
                reference = m.group(2)
205
                indents, i = self.subsequentIndents(lines, i + 1)
206
                reference += " " + indents
207
208
                self.bib.setReference(key, reference)
209
                continue
210
211
            # Look for all @citekey patterns inside hard brackets
212
            for bracket in BRACKET_RE.findall(lines[i]):
213
                for c in CITE_RE.findall(bracket):
214
                    self.bib.addCitation(c)
215
            linesOut.append(lines[i])
216
            i += 1
217
218
        return linesOut
219
220
221
class CitationsPattern(Pattern):
222
    """Handles converting citations keys into links"""
223
224
    def __init__(self, pattern, bibliography):
225
        super(CitationsPattern, self).__init__(pattern)
226
        self.bib = bibliography
227
228
    def handleMatch(self, m):
229
        span = etree.Element("span")
230
        for cite_match in CITE_RE.finditer(m.group(2)):
231
            id = cite_match.group(1)
232
            if id in self.bib.bibsource:
233
                a = etree.Element("a")
234
                a.set("id", self.bib.citationID(id))
235
                a.set("href", "./#" + self.bib.referenceID(id))
236
                a.set("class", "citation")
237
                a.text = self.bib.labels[id]
238
                span.append(a)
239
            else:
240
                continue
241
        if len(span) == 0:
242
            return None
243
        return span
244
245
246
context_citations = None
247
248
249
class CitationsExtension(Extension):
250
    def __init__(self):
251
        super(CitationsExtension, self).__init__()
252
        self.bib = None
253
254
    def extendMarkdown(self, md):
255
        md.registerExtension(self)
256
        self.parser = md.parser
257
        self.md = md
258
259
        md.preprocessors.register(CitationsPreprocessor(self.bib), "mdx_bib", 15)
260
        md.inlinePatterns.register(
261
            CitationsPattern(CITATION_RE, self.bib), "mdx_bib", 175
262
        )
263
264
265
def makeExtension(*args, **kwargs):
266
    return CitationsExtension(*args, **kwargs)
267
268
269
class BibTexPlugin(BasePlugin):
270
    config_scheme: Tuple[Tuple[str, MkType]] = (
271
        ("bibtex_file", MkType(str)),  # type: ignore[assignment]
272
        ("order", MkType(str, default="unsorted")),  # type: ignore[assignment]
273
    )
274
275
    def __init__(self):
276
        self.citations = None
277
278
    def on_config(self, config, **kwargs):
279
        extension = CitationsExtension()
280
        self.bib = Bibliography(
281
            extension,
282
            self,
283
            self.config["bibtex_file"],
284
            self.config["order"],
285
        )
286
        extension.bib = self.bib
287
        config["markdown_extensions"].append(extension)
288
289
    def on_page_content(self, html, page, config, files):
290
        html += "\n" + etree_to_string(self.bib.make_bibliography()).decode()
291
        self.bib.clear_citations()
292
        return html