a b/tests/extract_docs_code.py
1
import re
2
import shutil
3
import tempfile
4
from textwrap import dedent
5
from typing import Tuple
6
7
from markdown.extensions import Extension
8
from markdown.extensions.attr_list import get_attrs
9
from markdown.extensions.codehilite import parse_hl_lines
10
from markdown.extensions.fenced_code import FencedBlockPreprocessor
11
from mkdocs.commands.build import build
12
from mkdocs.config import load_config
13
from mkdocs.config.config_options import Type as MkType
14
from mkdocs.config.defaults import MkDocsConfig
15
from mkdocs.plugins import BasePlugin
16
from mkdocstrings.extension import AutoDocProcessor
17
from mkdocstrings.plugin import MkdocstringsPlugin
18
19
BRACKET_RE = re.compile(r"\[([^\[]+)\]")
20
CITE_RE = re.compile(r"@([\w_:-]+)")
21
DEF_RE = re.compile(r"\A {0,3}\[@([\w_:-]+)\]:\s*(.*)")
22
INDENT_RE = re.compile(r"\A\t| {4}(.*)")
23
24
CITATION_RE = r"(\[@(?:[\w_:-]+)(?: *, *@(?:[\w_:-]+))*\])"
25
26
27
class PyCodePreprocessor(FencedBlockPreprocessor):
28
    """Gather reference definitions and citation keys"""
29
30
    FENCED_BLOCK_RE = re.compile(
31
        dedent(
32
            r"""
33
            (?P<fence>^[ ]*(?:~{3,}|`{3,}))[ ]*                          # opening fence
34
            ((\{(?P<attrs>[^\}\n]*)\})|                              # (optional {attrs} or
35
            (\.?(?P<lang>[\w#.+-]*)[ ]*)?                            # optional (.)lang
36
            (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot)[ ]*)?) # optional hl_lines)
37
            \n                                                       # newline (end of opening fence)
38
            (?P<code>.*?)(?<=\n)                                     # the code block
39
            (?P=fence)[ ]*$                                          # closing fence
40
        """  # noqa: E501
41
        ),
42
        re.MULTILINE | re.DOTALL | re.VERBOSE,
43
    )
44
45
    def __init__(self, md, code_blocks):
46
        super().__init__(md, {})
47
        self.code_blocks = code_blocks
48
49
    def run(self, lines):
50
        text = "\n".join(lines)
51
        while True:
52
            # ----  https://github.com/Python-Markdown/markdown/blob/5a2fee/markdown/extensions/fenced_code.py#L84C9-L98  # noqa: E501
53
            m = self.FENCED_BLOCK_RE.search(text)
54
            if m:
55
                lang, id, classes, config = None, "", [], {}
56
                if m.group("attrs"):
57
                    id, classes, config = self.handle_attrs(get_attrs(m.group("attrs")))
58
                    if len(classes):
59
                        lang = classes.pop(0)
60
                else:
61
                    if m.group("lang"):
62
                        lang = m.group("lang")
63
                    if m.group("hl_lines"):
64
                        # Support `hl_lines` outside of `attrs` for
65
                        # backward-compatibility
66
                        config["hl_lines"] = parse_hl_lines(m.group("hl_lines"))
67
                # ----
68
                code = m.group("code")
69
70
                if lang == "python" and "no-check" not in classes:
71
                    self.code_blocks.append(dedent(code))
72
            else:
73
                break
74
            text = text[m.end() :]
75
76
        return lines
77
78
79
context_citations = None
80
81
82
class PyCodeExtension(Extension):
83
    def __init__(self, code_blocks):
84
        super(PyCodeExtension, self).__init__()
85
        self.code_blocks = code_blocks
86
87
    def extendMarkdown(self, md):
88
        self.md = md
89
        md.registerExtension(self)
90
        md.preprocessors.register(
91
            PyCodePreprocessor(md, self.code_blocks), "fenced_code", 31
92
        )
93
        for ext in md.registeredExtensions:
94
            if isinstance(ext, AutoDocProcessor):
95
                ext._config["mdx"].append(self)
96
97
98
def makeExtension(*args, **kwargs):
99
    return PyCodeExtension(*args, **kwargs)
100
101
102
class PyCodeExtractorPlugin(BasePlugin):
103
    config_scheme: Tuple[Tuple[str, MkType]] = (
104
        # ("bibtex_file", MkType(str)),  # type: ignore[assignment]
105
        # ("order", MkType(str, default="unsorted")),  # type: ignore[assignment]
106
    )
107
108
    def __init__(self, global_config):
109
        self.global_config = global_config
110
        self.page_code_blocks = []
111
        self.docs_code_blocks = []
112
113
    def on_config(self, config: MkDocsConfig):
114
        self.ext = PyCodeExtension(self.page_code_blocks)
115
        # After pymdownx.highlight, because of weird registering deleting the first
116
        # extension
117
        config["markdown_extensions"].append(self.ext)
118
        config["markdown_extensions"].remove("pymdownx.highlight")
119
        config["markdown_extensions"].remove("fenced_code")
120
121
    def on_pre_build(self, *, config: MkDocsConfig):
122
        mkdocstrings_plugin: MkdocstringsPlugin = config.plugins["mkdocstrings"]
123
        mkdocstrings_plugin.get_handler("python")
124
125
    def on_page_content(self, html, page, config, files):
126
        if len(self.page_code_blocks):
127
            self.docs_code_blocks.append((page.url, "\n".join(self.page_code_blocks)))
128
        self.page_code_blocks.clear()
129
        return html
130
131
132
def extract_docs_code():
133
    config = load_config()
134
135
    temp_dir = tempfile.mkdtemp()
136
    try:
137
        config["site_dir"] = temp_dir
138
139
        # plug the pycode extractor plugin
140
        plugin = PyCodeExtractorPlugin(config)
141
        config.plugins["pycode_extractor"] = plugin
142
143
        config["plugins"].run_event("startup", command="build", dirty=False)
144
        try:
145
            build(config)
146
        finally:
147
            config["plugins"].run_event("shutdown")
148
149
    finally:
150
        shutil.rmtree(temp_dir, ignore_errors=True)
151
152
    docs_code_blocks = plugin.docs_code_blocks
153
    # Deduplicate both keys and values
154
    return {
155
        k: v
156
        for v, k in {v: k for k, v in docs_code_blocks if "changelog" not in k}.items()
157
    }