Diff of /docs/stat.py [000000] .. [6d389a]

Switch to unified view

a b/docs/stat.py
1
#!/usr/bin/env python
2
# Copyright (c) OpenMMLab. All rights reserved.
3
import functools as func
4
import glob
5
import re
6
from os.path import basename, splitext
7
8
import numpy as np
9
import titlecase
10
11
12
def anchor(name):
13
    return re.sub(r'-+', '-', re.sub(r'[^a-zA-Z0-9]', '-',
14
                                     name.strip().lower())).strip('-')
15
16
17
# Count algorithms
18
19
files = sorted(glob.glob('*_models.md'))
20
# files = sorted(glob.glob('docs/*_models.md'))
21
22
stats = []
23
24
for f in files:
25
    with open(f, 'r') as content_file:
26
        content = content_file.read()
27
28
    # title
29
    title = content.split('\n')[0].replace('#', '')
30
31
    # skip IMAGE and ABSTRACT tags
32
    content = [
33
        x for x in content.split('\n')
34
        if 'IMAGE' not in x and 'ABSTRACT' not in x
35
    ]
36
    content = '\n'.join(content)
37
38
    # count papers
39
    papers = set(
40
        (papertype, titlecase.titlecase(paper.lower().strip()))
41
        for (papertype, paper) in re.findall(
42
            r'<!--\s*\[([A-Z]*?)\]\s*-->\s*\n.*?\btitle\s*=\s*{(.*?)}',
43
            content, re.DOTALL))
44
    # paper links
45
    revcontent = '\n'.join(list(reversed(content.splitlines())))
46
    paperlinks = {}
47
    for _, p in papers:
48
        print(p)
49
        q = p.replace('\\', '\\\\').replace('?', '\\?')
50
        paperlinks[p] = ' '.join(
51
            (f'[->]({splitext(basename(f))[0]}.html#{anchor(paperlink)})'
52
             for paperlink in re.findall(
53
                 rf'\btitle\s*=\s*{{\s*{q}\s*}}.*?\n## (.*?)\s*[,;]?\s*\n',
54
                 revcontent, re.DOTALL | re.IGNORECASE)))
55
        print('   ', paperlinks[p])
56
    paperlist = '\n'.join(
57
        sorted(f'    - [{t}] {x} ({paperlinks[x]})' for t, x in papers))
58
    # count configs
59
    configs = set(x.lower().strip()
60
                  for x in re.findall(r'https.*configs/.*\.py', content))
61
62
    # count ckpts
63
    ckpts = set(x.lower().strip()
64
                for x in re.findall(r'https://download.*\.pth', content)
65
                if 'mmaction' in x)
66
67
    statsmsg = f"""
68
## [{title}]({f})
69
70
* Number of checkpoints: {len(ckpts)}
71
* Number of configs: {len(configs)}
72
* Number of papers: {len(papers)}
73
{paperlist}
74
75
    """
76
77
    stats.append((papers, configs, ckpts, statsmsg))
78
79
allpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _, _ in stats])
80
allconfigs = func.reduce(lambda a, b: a.union(b), [c for _, c, _, _ in stats])
81
allckpts = func.reduce(lambda a, b: a.union(b), [c for _, _, c, _ in stats])
82
msglist = '\n'.join(x for _, _, _, x in stats)
83
84
papertypes, papercounts = np.unique([t for t, _ in allpapers],
85
                                    return_counts=True)
86
countstr = '\n'.join(
87
    [f'   - {t}: {c}' for t, c in zip(papertypes, papercounts)])
88
89
modelzoo = f"""
90
# Overview
91
92
* Number of checkpoints: {len(allckpts)}
93
* Number of configs: {len(allconfigs)}
94
* Number of papers: {len(allpapers)}
95
{countstr}
96
97
For supported datasets, see [datasets overview](datasets.md).
98
99
{msglist}
100
"""
101
102
with open('modelzoo.md', 'w') as f:
103
    f.write(modelzoo)
104
105
# Count datasets
106
107
files = ['supported_datasets.md']
108
# files = sorted(glob.glob('docs/tasks/*.md'))
109
110
datastats = []
111
112
for f in files:
113
    with open(f, 'r') as content_file:
114
        content = content_file.read()
115
116
    # title
117
    title = content.split('\n')[0].replace('#', '')
118
119
    # count papers
120
    papers = set(
121
        (papertype, titlecase.titlecase(paper.lower().strip()))
122
        for (papertype, paper) in re.findall(
123
            r'<!--\s*\[([A-Z]*?)\]\s*-->\s*\n.*?\btitle\s*=\s*{(.*?)}',
124
            content, re.DOTALL))
125
    # paper links
126
    revcontent = '\n'.join(list(reversed(content.splitlines())))
127
    paperlinks = {}
128
    for _, p in papers:
129
        print(p)
130
        q = p.replace('\\', '\\\\').replace('?', '\\?')
131
        paperlinks[p] = ', '.join(
132
            (f'[{p.strip()} ->]({splitext(basename(f))[0]}.html#{anchor(p)})'
133
             for p in re.findall(
134
                 rf'\btitle\s*=\s*{{\s*{q}\s*}}.*?\n## (.*?)\s*[,;]?\s*\n',
135
                 revcontent, re.DOTALL | re.IGNORECASE)))
136
        print('   ', paperlinks[p])
137
    paperlist = '\n'.join(
138
        sorted(f'    - [{t}] {x} ({paperlinks[x]})' for t, x in papers))
139
140
    statsmsg = f"""
141
## [{title}]({f})
142
143
* Number of papers: {len(papers)}
144
{paperlist}
145
146
    """
147
148
    datastats.append((papers, configs, ckpts, statsmsg))
149
150
alldatapapers = func.reduce(lambda a, b: a.union(b),
151
                            [p for p, _, _, _ in datastats])
152
153
# Summarize
154
155
msglist = '\n'.join(x for _, _, _, x in stats)
156
datamsglist = '\n'.join(x for _, _, _, x in datastats)
157
papertypes, papercounts = np.unique([t for t, _ in alldatapapers],
158
                                    return_counts=True)
159
countstr = '\n'.join(
160
    [f'   - {t}: {c}' for t, c in zip(papertypes, papercounts)])
161
162
modelzoo = f"""
163
# Overview
164
165
* Number of papers: {len(alldatapapers)}
166
{countstr}
167
168
For supported action algorithms, see [modelzoo overview](modelzoo.md).
169
170
{datamsglist}
171
"""
172
173
with open('datasets.md', 'w') as f:
174
    f.write(modelzoo)