-
Notifications
You must be signed in to change notification settings - Fork 2
/
festival_plan-parser.py
53 lines (46 loc) · 1.9 KB
/
festival_plan-parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# Author: Himura Kazuto <[email protected]>
import os
import re
import sys
import csv
from xml.etree import ElementTree
festival_plan_xls = os.path.expanduser(r"~\Desktop\festival_plan.xls") if len(sys.argv) < 2 else sys.argv[1]
out_dir = os.path.split(festival_plan_xls)[0]
xls_contents = open(festival_plan_xls, encoding='utf-8-sig').read()
xls_contents = xls_contents.split('<body>', 1)[1].replace('&', '&') # баги в вёрстке :(
xls_contents = re.sub(r'(<.+?=\")(.*?)\"(\">)', r'\1\2\3', xls_contents)
# open(os.path.join(out_dir, 'xls_contents.html'), 'w', encoding='utf-8').write(xls_contents) # на случай новых багов
table = ElementTree.XML(xls_contents)
plan = []
for row in iter(table):
time_cell, data_cell = row
if not time_cell.text:
tag = data_cell[0].tag
time = time_cell[0].text
val = data_cell[0].text
else:
tag = None
time = time_cell.text
val = data_cell.text
plan.append((tag, time, val))
human_plan = ''
technical_plan = []
for row in plan[2:]: # Отрезаем День и Место
tag, time, val = row
if tag == 'b': # Доп. инфа
human_plan += f'\n{time}\t{val}\n'
technical_plan.append((val, time, '', '', ''))
elif tag is None: # Номер
human_plan += f"{time}\t\t{val.replace(',', '.', 1)}\n"
code, title = val.split(', ', 1)
code, num = code.split(' ', 1)
technical_plan.append(('', time, code, num, title))
else:
raise Exception("Unexpected tag in cell: <%s>" % tag)
open(os.path.join(out_dir, 'human_plan.txt'), 'w', encoding='utf-8').write(human_plan)
with open(os.path.join(out_dir, 'technical_plan.csv'), 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(['info', 'time', 'code', 'num', 'voting_title'])
writer.writerows(technical_plan)