[7f9fb8]: / tools / dev / ensure_headers.py

Download this file

197 lines (164 with data), 6.8 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
"""Ensure license and copyright statements are in source files.
From https://www.bestpractices.dev/en/projects/7783?criteria_level=2:
The project MUST include a copyright statement in each source file, identifying the
copyright holder (e.g., the [project name] contributors). [copyright_per_file]
This MAY be done by including the following inside a comment near the beginning of
each file: "Copyright the [project name] contributors.".
And:
The project MUST include a license statement in each source file.
This script ensures that we use consistent license naming in consistent locations
toward the top of each file.
"""
# Authors: The MNE-Python contributors.
# License: BSD-3-Clause
# Copyright the MNE-Python contributors.
import re
from pathlib import Path
import numpy as np
from git import Repo
repo = Repo(Path(__file__).parents[2])
AUTHOR_LINE = "# Authors: The MNE-Python contributors."
LICENSE_LINE = "# License: BSD-3-Clause"
COPYRIGHT_LINE = "# Copyright the MNE-Python contributors."
# Cover how lines can start (regex or tuple to be used with startswith)
AUTHOR_RE = re.compile(r"^# (A|@a)uthors? ?: .*$")
LICENSE_STARTS = ("# License: ", "# SPDX-License-Identifier: ")
COPYRIGHT_STARTS = ("# Copyright ",)
def get_paths_from_tree(root, level=0):
"""Get paths from a GitPython tree."""
for entry in root:
if entry.type == "tree":
yield from get_paths_from_tree(entry, level + 1)
else:
yield Path(entry.path) # entry.type
def first_commentable_line(lines):
"""Find the first line where we can add a comment."""
max_len = 100
if lines[0].startswith(('"""', 'r"""')):
if lines[0].count('"""') == 2:
return 1
for insert in range(1, min(max_len, len(lines))):
if '"""' in lines[insert]:
return insert + 1
else:
raise RuntimeError(
f"Failed to find end of file docstring within {max_len} lines"
)
if lines[0].startswith("#!"):
return 1
else:
return 0
def path_multi_author(path):
"""Check if a file allows multi-author comments."""
return path.parts[0] in ("examples", "tutorials")
def get_author_idx(path, lines):
"""Get the index of the author line, if available."""
author_idx = np.where([AUTHOR_RE.match(line) is not None for line in lines])[0]
assert len(author_idx) <= 1, f"{len(author_idx)=} for {path=}"
return author_idx[0] if len(author_idx) else None
def get_license_idx(path, lines):
"""Get the license index."""
license_idx = np.where([line.startswith(LICENSE_STARTS) for line in lines])[0]
assert len(license_idx) <= 1, f"{len(license_idx)=} for {path=}"
return license_idx[0] if len(license_idx) else None
def _ensure_author(lines, path):
author_idx = get_author_idx(path, lines)
license_idx = get_license_idx(path, lines)
first_idx = first_commentable_line(lines)
# 1. Keep existing
if author_idx is not None:
# We have to be careful here -- examples and tutorials are allowed multiple
# authors
if path_multi_author(path):
# Just assume it's correct and return
return
assert license_idx is not None, f"{license_idx=} for {path=}"
for _ in range(license_idx - author_idx - 1):
lines.pop(author_idx + 1)
assert lines[author_idx + 1].startswith(LICENSE_STARTS), lines[license_idx + 1]
del license_idx
lines[author_idx] = AUTHOR_LINE
elif license_idx is not None:
# 2. Before license line if present
lines.insert(license_idx, AUTHOR_LINE)
else:
# 3. First line after docstring
lines.insert(first_idx, AUTHOR_LINE)
# Now make sure it's in the right spot
author_idx = get_author_idx(path, lines)
if author_idx != 0:
if author_idx == first_idx:
# Insert a blank line
lines.insert(author_idx, "")
author_idx += 1
first_idx += 1
if author_idx != first_idx:
raise RuntimeError(
"\nLine should have comments as docstring or author line needs to be moved "
"manually to be one blank line after the docstring:\n"
f"{path}: {author_idx=} != {first_idx=}"
)
def _ensure_license(lines, path):
# 1. Keep/replace existing
insert = get_license_idx(path, lines)
# 2. After author line(s)
if insert is None:
author_idx = get_author_idx(path, lines)
assert author_idx is not None, f"{author_idx=} for {path=}"
insert = author_idx + 1
if path_multi_author:
# Figure out where to insert the license:
for insert, line in enumerate(lines[author_idx + 1 :], insert):
if not line.startswith("# "):
break
if lines[insert].startswith(LICENSE_STARTS):
lines[insert] = LICENSE_LINE
else:
lines.insert(insert, LICENSE_LINE)
assert lines.count(LICENSE_LINE) == 1, f"{lines.count(LICENSE_LINE)=} for {path=}"
def _ensure_copyright(lines, path):
n_expected = {
"mne/preprocessing/_csd.py": 2,
"mne/transforms.py": 2,
}
n_copyright = sum(line.startswith(COPYRIGHT_STARTS) for line in lines)
assert n_copyright <= n_expected.get(str(path), 1), n_copyright
insert = lines.index(LICENSE_LINE) + 1
if lines[insert].startswith(COPYRIGHT_STARTS):
lines[insert] = COPYRIGHT_LINE
else:
lines.insert(insert, COPYRIGHT_LINE)
assert lines.count(COPYRIGHT_LINE) == 1, (
f"{lines.count(COPYRIGHT_LINE)=} for {path=}"
)
def _ensure_blank(lines, path):
assert lines.count(COPYRIGHT_LINE) == 1, (
f"{lines.count(COPYRIGHT_LINE)=} for {path=}"
)
insert = lines.index(COPYRIGHT_LINE) + 1
if lines[insert].strip(): # actually has content
lines.insert(insert, "")
for path in get_paths_from_tree(repo.tree()):
if not path.suffix == ".py":
continue
lines = path.read_text("utf-8").split("\n")
# Remove the UTF-8 file coding stuff
orig_lines = list(lines)
if lines[0] in ("# -*- coding: utf-8 -*-", "# -*- coding: UTF-8 -*-"):
lines = lines[1:]
if lines[0] == "":
lines = lines[1:]
# We had these with mne/commands without an executable bit, and don't really
# need them executable, so let's get rid of the line.
if lines[0].startswith("#!/usr/bin/env python") and path.parts[:2] == (
"mne",
"commands",
):
lines = lines[1:]
_ensure_author(lines, path)
_ensure_license(lines, path)
_ensure_copyright(lines, path)
_ensure_blank(lines, path)
if lines != orig_lines:
print(path)
path.write_text("\n".join(lines), "utf-8")