-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmake_overviews.py
210 lines (177 loc) · 7.17 KB
/
make_overviews.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
"""
Creates the overview markdown files.
"""
import bibtexparser
import os
from typing import TextIO
from dev.markdown import entry_to_markdown
import dev.names as names
def make_overview(document_structure: dict, grouped_entries: dict,
title: str, outfile: str):
"""
Creates an overview filed formatted as markdown.
The `document_structure` dictates the structure of the document.
It is a dictionary of categories mapping to its respective subcategory
structure.
A given category can be a tuple. In this case, all entries corresponding
to either of the groups will be included in the section. The first entry
in the tuple is used for naming the section.
If the subcategory structure is empty, None, or False,
the section will not further divide the categories' entries into
subsections.
The `grouped_entries` is a dictionary where the keys are the categories
mapping onto the entries that belong to the category.
The `title` is the title of the document. It will be printed as the first
line as main heading.
The `outfile` is the file to which the output will be written.
"""
with open(outfile, 'w') as out:
out.write(f'# {title}\n\n')
out.write('<!-- This file is automatically generated. Do not edit. -->\n\n')
_make_toc(document_structure, 0, out)
out.write('\n')
_write_section(document_structure, grouped_entries, 2, out)
def _make_toc(doc_structure: dict, nesting_depth: int, out: TextIO):
nesting_prefix = ' ' * nesting_depth
for section, sub_sections in doc_structure.items():
if not isinstance(section, tuple):
section = (section,)
section_name = names.translation.get(section[0], section[0])
toc_link = _github_toc_link(section_name)
out.write(f'{nesting_prefix}* [{section_name}](#{toc_link})\n')
if sub_sections:
_make_toc(sub_sections, nesting_depth + 1, out)
...
def _github_toc_link(section_name: str) -> str:
"""
Turns string to lower case, replaces spaces with dashes,
and removes non-alphanumeric characters.
"""
section_name = section_name.lower().replace(' ', '-')
section_name = ''.join(c for c in section_name if c.isalnum() or c == '-')
return section_name
def _write_section(section_structure: dict, grouped_entries: dict,
heading_level: int, out: TextIO):
heading_prefix = '#' * heading_level
for subsection, sub_structure in section_structure.items():
# A section can be a tuple or a string; we unify this to all tuples.
if not isinstance(subsection, tuple):
subsection = (subsection,)
# The first entry in the tuple is the name of the section.
section_name = names.translation.get(subsection[0], subsection[0])
out.write(f'{heading_prefix} {section_name}\n\n')
if sub_structure:
_write_section(sub_structure, grouped_entries,
heading_level + 1, out)
else:
entries = []
for category in subsection:
entries += grouped_entries.get(category, [])
# Sort by year (descending) and then by ID (ascending).
# Negative year forces descending order for first key.
entries.sort(key=lambda e: (-int(e['year']), e['ID']))
for entry in entries:
out.write('* ' + entry_to_markdown(entry))
out.write('\n')
out.write('\n')
if __name__ == '__main__':
with open('database.bib') as f:
bib = bibtexparser.load(f)
# Sort the entries by group.
group_entries = {}
for entry in bib.entries:
groups = entry.get('groups', '').split(',')
for group in groups:
group = group.strip()
group_entries.setdefault(group, []).append(entry)
# Sort the entries by year.
year_entries = {}
for entry in bib.entries:
year = entry['year']
year_entries.setdefault(year, []).append(entry)
ai_groups = {
('trees', 'ai-mul-trees'): {},
('randomforest', 'ai-mul-randomforest'): {},
('svm', 'ai-mul-svm'): {},
('knn', 'ai-mul-knn'): {},
'ai-mul-lr': {},
('neuralnetworks', 'ai-mul-neuralnetworks'): {},
'reinforcement-learning': {},
('genetic', 'ai-mul-genetic'): {},
('nlp', 'ai-mul-llm'): {},
'automatonlearning': {},
'baysianinference': {},
('clustering', 'ai-mul-clustering'): {},
('datamining', 'ai-mul-datamining'): {},
'ai-mul-naive': {},
# 'ai-multiple': {},
'ai-custom': {},
'ai-other': {},
}
fm_groups = {
'sat': {
'sat-prediction': [],
('sat-solving', 'sat-multi-solving'): [],
('sat-portfolio', 'sat-multi-algorithmselection'): [],
('sat-maxsat', 'sat-multi-maxsat'): [],
'sat-varselection': [],
'sat-branching': [],
'sat-generation': [],
'sat-parameter': [],
'sat-multi-modelcounting': [],
'sat-dependency': [],
'sat-meta': [],
},
'smt': {
'smt-solverselection': [],
'smt-quantifier': [],
'smt-quality': [],
},
'tp': {
'tp-portfolio': [],
('tp-tacticsprediction', 'tp-mul-tacticsprediction'): [],
'tp-formulaclassification': [],
('tp-axiomselection', 'tp-mul-axiomselection'): [],
('tp-proofsearch', 'tp-mul-proofsearch'): [],
'tp-proofmining': [],
'tp-mul-proofrewrite': [],
('tp-proofsynthesis', 'tp-mul-synthesis'): [],
'tp-formulasynthesis': [],
'tp-symbolic': [],
'tp-mul-symbolguessing': [],
'tp-heuristicselection': [],
'tp-mul-positionprediction': [],
'tp-lemmaname': [],
},
'modelchecking': [],
'synthesis': {
'synthesis-invariant': [],
'synthesis-loopinvariant': [],
'synthesis-repair': [],
'synthesis-specification': [],
'synthesis-annotations': [],
},
'other': [],
}
os.makedirs('overview', exist_ok=True)
make_overview(ai_groups, group_entries,
'Overview of used AI techniques from 2019-2023',
'overview/ai-techniques_2019-2023.md')
make_overview(fm_groups, group_entries,
'Overview of used FM techniques from 2019-2023',
'overview/fm-techniques_2019-2023.md')
year_overview = {y_str: [] for y in range(2023, 1971, -1)
if (y_str := str(y)) in year_entries}
make_overview(year_overview, year_entries,
'Overview of found primary studies by year',
'overview/all_by_year.md')
data_sets = {
'dataset-sat': [],
'dataset-tp': [],
'dataset-modelchecking': [],
'dataset-synthesis': [],
'dataset-smt': [],
}
make_overview(data_sets, group_entries,
'Overview of found data sets for applied ML onto FM from 2019-2023',
'overview/data-sets_2019-2023.md')