-
Notifications
You must be signed in to change notification settings - Fork 4
/
parse-html.html
110 lines (88 loc) · 2.19 KB
/
parse-html.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
<head>
<meta charset="utf-8">
<style>
body {
width: 1400px;
margin: 0 auto;
display: grid;
grid-template-columns: repeat(2, 1fr);
grid-gap: 20px;
}
h1 {
margin: 0;
grid-column: 1 / 3;
text-align: center;
}
textarea {
width: 100%;
height: 700px;
display: block;
white-space: pre;
font-size: 12px;
}
</style>
</head>
<body>
<h1>HTML parser</h1>
<div>
<textarea id="input"></textarea>
<button id="parse">Parse</button>
</div><!--
--><div>
<textarea id="output"></textarea>
<button id="copy">Copy</button>
</div>
</body>
<script>
const TAGS = [
'H1',
'H2',
'P',
'UL',
'LI',
'DIV',
]
const TEXT_TAGS = [
'H1',
'H2',
'P',
]
function parse(html) {
let data = {
tag: html.tagName.toLowerCase(),
children: [],
}
let text = html.innerHTML
text = text.replace(/^\n/, '')
text = text.replace(/\t/g, '')
text = text.replace(/\n/g, ' ')
text = text.replace(/[‘’]/g, '\'')
text = text.replace(/[“”]/g, '"')
text = text.replace(/<\/?i>/g, '_')
text = text.replace(/<\/?b>/g, '**')
text = text.replace(/<\/?sup>/g, '^')
text = text.replace(/<\/?strike>/g, '~~')
text = text.replace(/<br>/g, '')
text = text.replace(/<\/?a.*?>/g, '')
text = text.replace(/<\/?font.*?>/g, '')
text = text.replace(/<\/?span.*?>/g, '')
if (TEXT_TAGS.includes(html.tagName)) data.text = text
if (html.children) for (let child of html.children) if (TAGS.includes(child.tagName)) data.children.push(parse(child))
return data
}
let button = document.getElementById('parse')
button.addEventListener('click', () => {
let inputBox = document.getElementById('input')
let domParser = new window.DOMParser()
let input = domParser.parseFromString(inputBox.value, 'text/html')
input = input.children[0].children[1]
let output = parse(input)
let outputBox = document.getElementById('output')
outputBox.value = JSON.stringify(output, null, "\t")
})
let copyButton = document.getElementById('copy')
copyButton.addEventListener('click', () => {
let outputBox = document.getElementById('output')
navigator.clipboard.writeText(outputBox.value)
})
</script>