This repository has been archived by the owner on Mar 28, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
extract_genki_data.py
72 lines (59 loc) · 2.22 KB
/
extract_genki_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import json
from typing import Dict, List
import urllib.request
import jaconv
from ruamel.yaml import YAML
import os
yaml=YAML()
def request(action, **params):
# requires python 3.7 for order
return {'action': action, 'params': params, 'version': 6}
def invoke(action, **params):
requestJson = json.dumps(request(action, **params)).encode('utf-8')
response = json.load(urllib.request.urlopen(urllib.request.Request('http://localhost:8765', requestJson)))
if len(response) != 2:
raise Exception('response has an unexpected number of fields')
if 'error' not in response:
raise Exception('response is missing required error field')
if 'result' not in response:
raise Exception('response is missing required result field')
if response['error'] is not None:
raise Exception(response['error'])
return response['result']
def extract_readings(reading) -> List[Dict[str, str]]:
s = reading.split(';')
readings = []
for reading in s:
important = False
if '#aa0000' in reading:
important = True
clean_reading = "".join(filter(lambda c: not c.isascii(), reading))
hiragana = jaconv.kata2hira(clean_reading)
readings.append({
'reading': hiragana,
'important': important
})
return readings
if __name__ == "__main__":
for i in range(3, 13):
card_ids = invoke('findNotes', query=f'deck:Genki_I::L{i}_Kanji')
cards = invoke('notesInfo', notes=card_ids)
kanjis = []
vocabs = []
for c in cards:
kanji_note = {}
fields = c['fields']
kanji = fields['Kanji']['value']
meaning = fields['Bedeutung']['value']
onyomi = extract_readings(fields['Onyomi']['value'])
kunyomi = extract_readings(fields['Kunyomi']['value'])
kanjis.append({
'kanji': kanji,
'meaning': meaning,
'onyomi': onyomi,
'kunyomi': kunyomi,
})
folder_path = 'data/genki'
os.makedirs(folder_path, exist_ok=True)
with open(f'{folder_path}/kanjis_{str(i).zfill(2)}.yaml', 'w+') as o:
yaml.dump(kanjis, o)