-
Notifications
You must be signed in to change notification settings - Fork 0
/
XMLToHTML.py
31 lines (26 loc) · 1.06 KB
/
XMLToHTML.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import toHTML
import re
datePattern = re.compile(r'AUNIT="PERIOD[^"]+" AUNITVALUE="([0-9]+)"')
def XMLToHTML(XMLFileName, reqEncoding):
XMLFilePath = "../datas/kic/" + XMLFileName
dateDict = dict()
with open(XMLFilePath, 'r', encoding=reqEncoding) as f:
XMLData = f.read()
XMLData = XMLData.split('\n', maxsplit=1)[1] # Trimming <?xml version="1.0" encoding="utf-8"?>
XMLData = XMLData.replace('&cr;', '<br />') # Replacing unvalid carriage returns with valid ones
dates = re.findall(datePattern, XMLData)
dateDict['start'] = dates[0]
dateDict['end'] = dates[1]
return dict(toHTML.convert(XMLData), **dateDict)
if __name__ == "__main__":
import sys
argFileName = sys.argv[1]
if len(sys.argv) != 2:
print("Insufficient arguments")
sys.exit()
with open("../" + argFileName.replace('.xml', '.html'), 'w') as f:
try:
resDict = XMLToHTML(argFileName, 'euc-kr')
except:
resDict = XMLToHTML(argFileName, 'utf-8')
f.write(resDict['html'])