|
a |
|
b/docs/scripts/bibtex.py |
|
|
1 |
# Based on https://github.com/darwindarak/mdx_bib |
|
|
2 |
import re |
|
|
3 |
import string |
|
|
4 |
from collections import Counter, OrderedDict |
|
|
5 |
from typing import Tuple |
|
|
6 |
from xml.etree import ElementTree as etree |
|
|
7 |
from xml.etree.ElementTree import tostring as etree_to_string |
|
|
8 |
|
|
|
9 |
from markdown.extensions import Extension |
|
|
10 |
from markdown.inlinepatterns import Pattern |
|
|
11 |
from markdown.preprocessors import Preprocessor |
|
|
12 |
from mkdocs.config.config_options import Type as MkType |
|
|
13 |
from mkdocs.plugins import BasePlugin |
|
|
14 |
from pybtex.database.input import bibtex |
|
|
15 |
from pybtex.exceptions import PybtexError |
|
|
16 |
|
|
|
17 |
BRACKET_RE = re.compile(r"\[([^\[]+)\]") |
|
|
18 |
CITE_RE = re.compile(r"@([\w_:-]+)") |
|
|
19 |
DEF_RE = re.compile(r"\A {0,3}\[@([\w_:-]+)\]:\s*(.*)") |
|
|
20 |
INDENT_RE = re.compile(r"\A\t| {4}(.*)") |
|
|
21 |
|
|
|
22 |
CITATION_RE = r"(\[@(?:[\w_:-]+)(?: *, *@(?:[\w_:-]+))*\])" |
|
|
23 |
|
|
|
24 |
|
|
|
25 |
class Bibliography(object): |
|
|
26 |
"""Keep track of document references and citations for exporting""" |
|
|
27 |
|
|
|
28 |
def __init__(self, extension, plugin, bibtex_file, order): |
|
|
29 |
self.extension = extension |
|
|
30 |
self.order = order |
|
|
31 |
self.plugin = plugin |
|
|
32 |
|
|
|
33 |
self.citations = OrderedDict() |
|
|
34 |
self.references = dict() |
|
|
35 |
|
|
|
36 |
if bibtex_file: |
|
|
37 |
try: |
|
|
38 |
parser = bibtex.Parser() |
|
|
39 |
self.bibsource = parser.parse_file(bibtex_file).entries |
|
|
40 |
self.labels = { |
|
|
41 |
id: self.formatCitation(self.bibsource[id]) |
|
|
42 |
for id in self.bibsource.keys() |
|
|
43 |
} |
|
|
44 |
for value, occurrences in Counter(self.labels.values()).items(): |
|
|
45 |
if occurrences > 1: |
|
|
46 |
for xkey, xvalue in self.labels.items(): |
|
|
47 |
i = 0 |
|
|
48 |
if xvalue == value: |
|
|
49 |
self.labels[ |
|
|
50 |
xkey |
|
|
51 |
] = f"{xvalue}{string.ascii_lowercase[i]}" |
|
|
52 |
i += 1 |
|
|
53 |
|
|
|
54 |
except PybtexError: |
|
|
55 |
print("Error loading bibtex file") |
|
|
56 |
self.bibsource = dict() |
|
|
57 |
self.labels = {} |
|
|
58 |
else: |
|
|
59 |
self.bibsource = dict() |
|
|
60 |
|
|
|
61 |
def addCitation(self, citekey): |
|
|
62 |
self.citations[citekey] = self.citations.get(citekey, 0) + 1 |
|
|
63 |
|
|
|
64 |
def setReference(self, citekey, reference): |
|
|
65 |
self.references[citekey] = reference |
|
|
66 |
|
|
|
67 |
def citationID(self, citekey): |
|
|
68 |
return "cite-" + citekey |
|
|
69 |
|
|
|
70 |
def referenceID(self, citekey): |
|
|
71 |
return "ref-" + citekey |
|
|
72 |
|
|
|
73 |
def formatAuthor(self, author): |
|
|
74 |
out = ( |
|
|
75 |
author.last_names[0] |
|
|
76 |
+ ((" " + author.first_names[0][0]) if author.first_names else "") |
|
|
77 |
+ "." |
|
|
78 |
) |
|
|
79 |
if author.middle_names: |
|
|
80 |
out += f"{author.middle_names[0][0]}." |
|
|
81 |
return out.replace("{", "").replace("}", "") |
|
|
82 |
|
|
|
83 |
def formatAuthorSurname(self, author): |
|
|
84 |
out = author.last_names[0] |
|
|
85 |
return out.replace("{", "").replace("}", "") |
|
|
86 |
|
|
|
87 |
def formatReference(self, ref): |
|
|
88 |
author_list = list(map(self.formatAuthor, ref.persons["author"])) |
|
|
89 |
|
|
|
90 |
if len(author_list) == 1: |
|
|
91 |
authors = author_list[0] |
|
|
92 |
else: |
|
|
93 |
authors = ", ".join(author_list[:-1]) |
|
|
94 |
authors += f" and {author_list[-1]}" |
|
|
95 |
|
|
|
96 |
# Harvard style |
|
|
97 |
# Surname, Initial, ... and Last_Surname, |
|
|
98 |
# Initial, Year. Title. Journal, Volume(Issue), pages. doi. |
|
|
99 |
|
|
|
100 |
title = ref.fields["title"].replace("{", "").replace("}", "") |
|
|
101 |
journal = ref.fields.get("journal", "") |
|
|
102 |
volume = ref.fields.get("volume", "") |
|
|
103 |
issue = ref.fields.get("issue", "") |
|
|
104 |
year = ref.fields.get("year") |
|
|
105 |
pages = ref.fields.get("pages") |
|
|
106 |
doi = ref.fields.get("doi") |
|
|
107 |
url = ref.fields.get("url") |
|
|
108 |
|
|
|
109 |
ref_id = self.referenceID(ref.key) |
|
|
110 |
ref = f"<p id={repr(ref_id)}>{authors}, {year}. {title}." |
|
|
111 |
if journal: |
|
|
112 |
ref += f" <i>{journal}</i>." |
|
|
113 |
if volume: |
|
|
114 |
ref += f" <i>{volume}</i>" |
|
|
115 |
if issue: |
|
|
116 |
ref += f"({issue})" |
|
|
117 |
if pages: |
|
|
118 |
ref += f", pp.{pages}" |
|
|
119 |
ref += "." |
|
|
120 |
if doi: |
|
|
121 |
ref += f' <a href="https://dx.doi.org/{doi}" target="_blank">{doi}</a>' |
|
|
122 |
elif url: |
|
|
123 |
ref += f' <a href="{url}" target="_blank">{url}</a>' |
|
|
124 |
ref += "</p>" |
|
|
125 |
|
|
|
126 |
return etree.fromstring(ref) |
|
|
127 |
|
|
|
128 |
def formatCitation(self, ref): |
|
|
129 |
author_list = list(map(self.formatAuthorSurname, ref.persons["author"])) |
|
|
130 |
year = ref.fields.get("year") |
|
|
131 |
|
|
|
132 |
if len(author_list) == 1: |
|
|
133 |
citation = f"{author_list[0]}" |
|
|
134 |
elif len(author_list) == 2: |
|
|
135 |
citation = f"{author_list[0]} and {author_list[1]}" |
|
|
136 |
else: |
|
|
137 |
citation = f"{author_list[0]} et al." |
|
|
138 |
|
|
|
139 |
citation += f", {year}" |
|
|
140 |
|
|
|
141 |
return citation |
|
|
142 |
|
|
|
143 |
def make_bibliography(self): |
|
|
144 |
if self.order == "alphabetical": |
|
|
145 |
raise (NotImplementedError) |
|
|
146 |
|
|
|
147 |
div = etree.Element("div") |
|
|
148 |
div.set("class", "footnote") |
|
|
149 |
div.append(etree.Element("hr")) |
|
|
150 |
ol = etree.SubElement(div, "ol") |
|
|
151 |
|
|
|
152 |
if not self.citations: |
|
|
153 |
return div |
|
|
154 |
|
|
|
155 |
# table = etree.SubElement(div, "table") |
|
|
156 |
# table.set("class", "references") |
|
|
157 |
# tbody = etree.SubElement(table, "tbody") |
|
|
158 |
etree.SubElement(div, "div") |
|
|
159 |
for id in self.citations: |
|
|
160 |
li = etree.SubElement(ol, "li") |
|
|
161 |
li.set("id", self.referenceID(id)) |
|
|
162 |
# ref_id = etree.SubElement(li, "td") |
|
|
163 |
ref_txt = etree.SubElement(li, "p") |
|
|
164 |
if id in self.references: |
|
|
165 |
self.extension.parser.parseChunk(ref_txt, self.references[id]) |
|
|
166 |
elif id in self.bibsource: |
|
|
167 |
ref_txt.append(self.formatReference(self.bibsource[id])) |
|
|
168 |
else: |
|
|
169 |
ref_txt.text = "Missing citation for {}".format(id) |
|
|
170 |
|
|
|
171 |
return div |
|
|
172 |
|
|
|
173 |
def clear_citations(self): |
|
|
174 |
self.citations = OrderedDict() |
|
|
175 |
|
|
|
176 |
|
|
|
177 |
class CitationsPreprocessor(Preprocessor): |
|
|
178 |
"""Gather reference definitions and citation keys""" |
|
|
179 |
|
|
|
180 |
def __init__(self, bibliography): |
|
|
181 |
self.bib = bibliography |
|
|
182 |
|
|
|
183 |
def subsequentIndents(self, lines, i): |
|
|
184 |
"""Concatenate consecutive indented lines""" |
|
|
185 |
linesOut = [] |
|
|
186 |
while i < len(lines): |
|
|
187 |
m = INDENT_RE.match(lines[i]) |
|
|
188 |
if m: |
|
|
189 |
linesOut.append(m.group(1)) |
|
|
190 |
i += 1 |
|
|
191 |
else: |
|
|
192 |
break |
|
|
193 |
return " ".join(linesOut), i |
|
|
194 |
|
|
|
195 |
def run(self, lines): |
|
|
196 |
linesOut = [] |
|
|
197 |
i = 0 |
|
|
198 |
|
|
|
199 |
while i < len(lines): |
|
|
200 |
# Check to see if the line starts a reference definition |
|
|
201 |
m = DEF_RE.match(lines[i]) |
|
|
202 |
if m: |
|
|
203 |
key = m.group(1) |
|
|
204 |
reference = m.group(2) |
|
|
205 |
indents, i = self.subsequentIndents(lines, i + 1) |
|
|
206 |
reference += " " + indents |
|
|
207 |
|
|
|
208 |
self.bib.setReference(key, reference) |
|
|
209 |
continue |
|
|
210 |
|
|
|
211 |
# Look for all @citekey patterns inside hard brackets |
|
|
212 |
for bracket in BRACKET_RE.findall(lines[i]): |
|
|
213 |
for c in CITE_RE.findall(bracket): |
|
|
214 |
self.bib.addCitation(c) |
|
|
215 |
linesOut.append(lines[i]) |
|
|
216 |
i += 1 |
|
|
217 |
|
|
|
218 |
return linesOut |
|
|
219 |
|
|
|
220 |
|
|
|
221 |
class CitationsPattern(Pattern): |
|
|
222 |
"""Handles converting citations keys into links""" |
|
|
223 |
|
|
|
224 |
def __init__(self, pattern, bibliography): |
|
|
225 |
super(CitationsPattern, self).__init__(pattern) |
|
|
226 |
self.bib = bibliography |
|
|
227 |
|
|
|
228 |
def handleMatch(self, m): |
|
|
229 |
span = etree.Element("span") |
|
|
230 |
for cite_match in CITE_RE.finditer(m.group(2)): |
|
|
231 |
id = cite_match.group(1) |
|
|
232 |
if id in self.bib.bibsource: |
|
|
233 |
a = etree.Element("a") |
|
|
234 |
a.set("id", self.bib.citationID(id)) |
|
|
235 |
a.set("href", "./#" + self.bib.referenceID(id)) |
|
|
236 |
a.set("class", "citation") |
|
|
237 |
a.text = self.bib.labels[id] |
|
|
238 |
span.append(a) |
|
|
239 |
else: |
|
|
240 |
continue |
|
|
241 |
if len(span) == 0: |
|
|
242 |
return None |
|
|
243 |
return span |
|
|
244 |
|
|
|
245 |
|
|
|
246 |
context_citations = None |
|
|
247 |
|
|
|
248 |
|
|
|
249 |
class CitationsExtension(Extension): |
|
|
250 |
def __init__(self): |
|
|
251 |
super(CitationsExtension, self).__init__() |
|
|
252 |
self.bib = None |
|
|
253 |
|
|
|
254 |
def extendMarkdown(self, md): |
|
|
255 |
md.registerExtension(self) |
|
|
256 |
self.parser = md.parser |
|
|
257 |
self.md = md |
|
|
258 |
|
|
|
259 |
md.preprocessors.register(CitationsPreprocessor(self.bib), "mdx_bib", 15) |
|
|
260 |
md.inlinePatterns.register( |
|
|
261 |
CitationsPattern(CITATION_RE, self.bib), "mdx_bib", 175 |
|
|
262 |
) |
|
|
263 |
|
|
|
264 |
|
|
|
265 |
def makeExtension(*args, **kwargs): |
|
|
266 |
return CitationsExtension(*args, **kwargs) |
|
|
267 |
|
|
|
268 |
|
|
|
269 |
class BibTexPlugin(BasePlugin): |
|
|
270 |
config_scheme: Tuple[Tuple[str, MkType]] = ( |
|
|
271 |
("bibtex_file", MkType(str)), # type: ignore[assignment] |
|
|
272 |
("order", MkType(str, default="unsorted")), # type: ignore[assignment] |
|
|
273 |
) |
|
|
274 |
|
|
|
275 |
def __init__(self): |
|
|
276 |
self.citations = None |
|
|
277 |
|
|
|
278 |
def on_config(self, config, **kwargs): |
|
|
279 |
extension = CitationsExtension() |
|
|
280 |
self.bib = Bibliography( |
|
|
281 |
extension, |
|
|
282 |
self, |
|
|
283 |
self.config["bibtex_file"], |
|
|
284 |
self.config["order"], |
|
|
285 |
) |
|
|
286 |
extension.bib = self.bib |
|
|
287 |
config["markdown_extensions"].append(extension) |
|
|
288 |
|
|
|
289 |
def on_page_content(self, html, page, config, files): |
|
|
290 |
html += "\n" + etree_to_string(self.bib.make_bibliography()).decode() |
|
|
291 |
self.bib.clear_citations() |
|
|
292 |
return html |