|
a |
|
b/tests/extract_docs_code.py |
|
|
1 |
import re |
|
|
2 |
import shutil |
|
|
3 |
import tempfile |
|
|
4 |
from textwrap import dedent |
|
|
5 |
from typing import Tuple |
|
|
6 |
|
|
|
7 |
from markdown.extensions import Extension |
|
|
8 |
from markdown.extensions.attr_list import get_attrs |
|
|
9 |
from markdown.extensions.codehilite import parse_hl_lines |
|
|
10 |
from markdown.extensions.fenced_code import FencedBlockPreprocessor |
|
|
11 |
from mkdocs.commands.build import build |
|
|
12 |
from mkdocs.config import load_config |
|
|
13 |
from mkdocs.config.config_options import Type as MkType |
|
|
14 |
from mkdocs.config.defaults import MkDocsConfig |
|
|
15 |
from mkdocs.plugins import BasePlugin |
|
|
16 |
from mkdocstrings.extension import AutoDocProcessor |
|
|
17 |
from mkdocstrings.plugin import MkdocstringsPlugin |
|
|
18 |
|
|
|
19 |
BRACKET_RE = re.compile(r"\[([^\[]+)\]") |
|
|
20 |
CITE_RE = re.compile(r"@([\w_:-]+)") |
|
|
21 |
DEF_RE = re.compile(r"\A {0,3}\[@([\w_:-]+)\]:\s*(.*)") |
|
|
22 |
INDENT_RE = re.compile(r"\A\t| {4}(.*)") |
|
|
23 |
|
|
|
24 |
CITATION_RE = r"(\[@(?:[\w_:-]+)(?: *, *@(?:[\w_:-]+))*\])" |
|
|
25 |
|
|
|
26 |
|
|
|
27 |
class PyCodePreprocessor(FencedBlockPreprocessor): |
|
|
28 |
"""Gather reference definitions and citation keys""" |
|
|
29 |
|
|
|
30 |
FENCED_BLOCK_RE = re.compile( |
|
|
31 |
dedent( |
|
|
32 |
r""" |
|
|
33 |
(?P<fence>^[ ]*(?:~{3,}|`{3,}))[ ]* # opening fence |
|
|
34 |
((\{(?P<attrs>[^\}\n]*)\})| # (optional {attrs} or |
|
|
35 |
(\.?(?P<lang>[\w#.+-]*)[ ]*)? # optional (.)lang |
|
|
36 |
(hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot)[ ]*)?) # optional hl_lines) |
|
|
37 |
\n # newline (end of opening fence) |
|
|
38 |
(?P<code>.*?)(?<=\n) # the code block |
|
|
39 |
(?P=fence)[ ]*$ # closing fence |
|
|
40 |
""" # noqa: E501 |
|
|
41 |
), |
|
|
42 |
re.MULTILINE | re.DOTALL | re.VERBOSE, |
|
|
43 |
) |
|
|
44 |
|
|
|
45 |
def __init__(self, md, code_blocks): |
|
|
46 |
super().__init__(md, {}) |
|
|
47 |
self.code_blocks = code_blocks |
|
|
48 |
|
|
|
49 |
def run(self, lines): |
|
|
50 |
text = "\n".join(lines) |
|
|
51 |
while True: |
|
|
52 |
# ---- https://github.com/Python-Markdown/markdown/blob/5a2fee/markdown/extensions/fenced_code.py#L84C9-L98 # noqa: E501 |
|
|
53 |
m = self.FENCED_BLOCK_RE.search(text) |
|
|
54 |
if m: |
|
|
55 |
lang, id, classes, config = None, "", [], {} |
|
|
56 |
if m.group("attrs"): |
|
|
57 |
id, classes, config = self.handle_attrs(get_attrs(m.group("attrs"))) |
|
|
58 |
if len(classes): |
|
|
59 |
lang = classes.pop(0) |
|
|
60 |
else: |
|
|
61 |
if m.group("lang"): |
|
|
62 |
lang = m.group("lang") |
|
|
63 |
if m.group("hl_lines"): |
|
|
64 |
# Support `hl_lines` outside of `attrs` for |
|
|
65 |
# backward-compatibility |
|
|
66 |
config["hl_lines"] = parse_hl_lines(m.group("hl_lines")) |
|
|
67 |
# ---- |
|
|
68 |
code = m.group("code") |
|
|
69 |
|
|
|
70 |
if lang == "python" and "no-check" not in classes: |
|
|
71 |
self.code_blocks.append(dedent(code)) |
|
|
72 |
else: |
|
|
73 |
break |
|
|
74 |
text = text[m.end() :] |
|
|
75 |
|
|
|
76 |
return lines |
|
|
77 |
|
|
|
78 |
|
|
|
79 |
context_citations = None |
|
|
80 |
|
|
|
81 |
|
|
|
82 |
class PyCodeExtension(Extension): |
|
|
83 |
def __init__(self, code_blocks): |
|
|
84 |
super(PyCodeExtension, self).__init__() |
|
|
85 |
self.code_blocks = code_blocks |
|
|
86 |
|
|
|
87 |
def extendMarkdown(self, md): |
|
|
88 |
self.md = md |
|
|
89 |
md.registerExtension(self) |
|
|
90 |
md.preprocessors.register( |
|
|
91 |
PyCodePreprocessor(md, self.code_blocks), "fenced_code", 31 |
|
|
92 |
) |
|
|
93 |
for ext in md.registeredExtensions: |
|
|
94 |
if isinstance(ext, AutoDocProcessor): |
|
|
95 |
ext._config["mdx"].append(self) |
|
|
96 |
|
|
|
97 |
|
|
|
98 |
def makeExtension(*args, **kwargs): |
|
|
99 |
return PyCodeExtension(*args, **kwargs) |
|
|
100 |
|
|
|
101 |
|
|
|
102 |
class PyCodeExtractorPlugin(BasePlugin): |
|
|
103 |
config_scheme: Tuple[Tuple[str, MkType]] = ( |
|
|
104 |
# ("bibtex_file", MkType(str)), # type: ignore[assignment] |
|
|
105 |
# ("order", MkType(str, default="unsorted")), # type: ignore[assignment] |
|
|
106 |
) |
|
|
107 |
|
|
|
108 |
def __init__(self, global_config): |
|
|
109 |
self.global_config = global_config |
|
|
110 |
self.page_code_blocks = [] |
|
|
111 |
self.docs_code_blocks = [] |
|
|
112 |
|
|
|
113 |
def on_config(self, config: MkDocsConfig): |
|
|
114 |
self.ext = PyCodeExtension(self.page_code_blocks) |
|
|
115 |
# After pymdownx.highlight, because of weird registering deleting the first |
|
|
116 |
# extension |
|
|
117 |
config["markdown_extensions"].append(self.ext) |
|
|
118 |
config["markdown_extensions"].remove("pymdownx.highlight") |
|
|
119 |
config["markdown_extensions"].remove("fenced_code") |
|
|
120 |
|
|
|
121 |
def on_pre_build(self, *, config: MkDocsConfig): |
|
|
122 |
mkdocstrings_plugin: MkdocstringsPlugin = config.plugins["mkdocstrings"] |
|
|
123 |
mkdocstrings_plugin.get_handler("python") |
|
|
124 |
|
|
|
125 |
def on_page_content(self, html, page, config, files): |
|
|
126 |
if len(self.page_code_blocks): |
|
|
127 |
self.docs_code_blocks.append((page.url, "\n".join(self.page_code_blocks))) |
|
|
128 |
self.page_code_blocks.clear() |
|
|
129 |
return html |
|
|
130 |
|
|
|
131 |
|
|
|
132 |
def extract_docs_code(): |
|
|
133 |
config = load_config() |
|
|
134 |
|
|
|
135 |
temp_dir = tempfile.mkdtemp() |
|
|
136 |
try: |
|
|
137 |
config["site_dir"] = temp_dir |
|
|
138 |
|
|
|
139 |
# plug the pycode extractor plugin |
|
|
140 |
plugin = PyCodeExtractorPlugin(config) |
|
|
141 |
config.plugins["pycode_extractor"] = plugin |
|
|
142 |
|
|
|
143 |
config["plugins"].run_event("startup", command="build", dirty=False) |
|
|
144 |
try: |
|
|
145 |
build(config) |
|
|
146 |
finally: |
|
|
147 |
config["plugins"].run_event("shutdown") |
|
|
148 |
|
|
|
149 |
finally: |
|
|
150 |
shutil.rmtree(temp_dir, ignore_errors=True) |
|
|
151 |
|
|
|
152 |
docs_code_blocks = plugin.docs_code_blocks |
|
|
153 |
# Deduplicate both keys and values |
|
|
154 |
return { |
|
|
155 |
k: v |
|
|
156 |
for v, k in {v: k for k, v in docs_code_blocks if "changelog" not in k}.items() |
|
|
157 |
} |