-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtidy.c
313 lines (282 loc) · 11.5 KB
/
tidy.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
/* -*- Mode: C; Character-encoding: utf-8; -*- */
/* Copyright (C) 2007-2016 beingmeta, inc.
Copyright (C) 2020-2022 beingmeta, LLC
This file is part of beingmeta's Kno platform and is copyright
and a valuable trade secret of beingmeta, inc.
*/
#ifndef _FILEINFO
#define _FILEINFO __FILE__
#endif
#include "kno/knosource.h"
#include "kno/lisp.h"
#include "kno/eval.h"
#include "kno/cprims.h"
#include "libu8/u8logging.h"
#include "tidy5/tidy.h"
#include "tidy5/tidybuffio.h"
#include "tidy5/tidyenum.h"
KNO_EXPORT int kno_init_tidy(void) KNO_LIBINIT_FN;
u8_condition kno_TidyError=_("Tidy Error");
static long long int tidy_init = 0;
/* Opt setting */
static lispval getoption(lispval opts,u8_string optstring,lispval dflt)
{
if ((KNO_FALSEP(opts))||(KNO_VOIDP(opts)))
return kno_incref(dflt);
else return kno_getopt(opts,kno_intern(optstring),dflt);
}
static U8_MAYBE_UNUSED int tidySetBoolOpt(TidyDoc tdoc,
TidyOptionId optname,
lispval value) {
if (KNO_FALSEP(value))
return tidyOptSetBool(tdoc,optname,no);
else return tidyOptSetBool(tdoc,optname,yes);}
static U8_MAYBE_UNUSED int copyBoolOpt(lispval opts,
TidyDoc tdoc,
TidyOptionId optname,
u8_string optstring,
lispval dflt){
lispval value = getoption(opts,optstring,dflt); int rc;
if (KNO_FALSEP(value))
rc = tidyOptSetBool(tdoc,optname,no);
else rc = tidyOptSetBool(tdoc,optname,yes);
kno_decref(value);
return rc;}
static U8_MAYBE_UNUSED int tidySetIntOpt(TidyDoc tdoc,
TidyOptionId optname,
lispval value){
if (KNO_INTP(value))
return tidyOptSetInt(tdoc,optname,KNO_FIX2INT(value));
else {
kno_incref(value);
kno_seterr(kno_TypeError,"tidySetIntOpt","integer",value);
return -1;}}
static U8_MAYBE_UNUSED int copyIntOpt(lispval opts,TidyDoc tdoc,
TidyOptionId optname,
u8_string optstring,
int dflt){
int rc = -1;
lispval value = getoption(opts,optstring,KNO_VOID);
if (KNO_VOIDP(value))
rc = tidyOptSetInt(tdoc,optname,dflt);
else if (KNO_INTP(value))
rc = tidyOptSetInt(tdoc,optname,KNO_FIX2INT(value));
else {
kno_incref(value);
kno_seterr(kno_TypeError,"tidySetIntOpt","integer",value);
rc = -1;}
kno_decref(value);
return rc;}
static U8_MAYBE_UNUSED int tidySetStringOpt(TidyDoc tdoc,
TidyOptionId optname,
lispval value){
if (KNO_STRINGP(value))
return tidyOptSetValue(tdoc,optname,KNO_CSTRING(value));
else {
kno_incref(value);
kno_seterr(kno_TypeError,"tidySetIntOpt","string",value);
return -1;}}
static U8_MAYBE_UNUSED int copyStringOpt(lispval opts,
TidyDoc tdoc,
TidyOptionId optname,
u8_string optstring,
u8_string dflt) {
int rc = -1;
lispval value = getoption(opts,optstring,KNO_VOID);
if (KNO_VOIDP(value))
rc = tidyOptSetValue(tdoc,optname,dflt);
else if (KNO_STRINGP(value))
rc = tidyOptSetValue(tdoc,optname,KNO_CSTRING(value));
else {
kno_incref(value);
kno_seterr(kno_TypeError,"copyStringOpt(tidy)","string",value);
return -1;}
return rc;}
static U8_MAYBE_UNUSED int testopt(lispval opts,lispval sym,int dflt)
{
lispval v = kno_getopt(opts,sym,KNO_VOID);
if (KNO_VOIDP(v)) return dflt;
else if (KNO_FALSEP(v)) return 0;
else {
kno_decref(v);
return 1;}
}
/* The main primitive */
static lispval doctype_symbol, dontfix_symbol, wrap_symbol, xhtml_symbol;
static lispval tidy_prim_helper(lispval string,lispval opts,lispval diag,
int do_fixes,int xhtml)
{
lispval result = KNO_VOID; TidyBuffer outbuf={NULL};
TidyBuffer errbuf={NULL};
int rc = -1;
TidyDoc tdoc = tidyCreate();
lispval for_real = ((do_fixes)?(KNO_TRUE):(KNO_FALSE));
tidyBufInit(&outbuf);
tidyBufInit(&errbuf);
rc = tidySetErrorBuffer(tdoc,&errbuf);
if (rc<0) {
tidyRelease(tdoc);
return kno_err(kno_TidyError,"tidy_prim/init",NULL,KNO_VOID);}
if (xhtml) {
rc = copyBoolOpt(opts,tdoc,TidyXhtmlOut,"XHTMLOUT",KNO_TRUE);
if (rc>=0) rc = copyBoolOpt(opts,tdoc,TidyXmlSpace,"XMLSPACE",KNO_TRUE);}
else rc = copyBoolOpt(opts,tdoc,TidyHtmlOut,"HTMLOUT",KNO_FALSE);
if (rc>=0) rc = copyStringOpt(opts,tdoc,TidyCharEncoding,"ENCODING","utf8");
if (rc>=0) rc = copyStringOpt(opts,tdoc,TidyAltText,"ALTSTRING","utf8");
if (rc>=0) rc = copyBoolOpt(opts,tdoc,TidyShowWarnings,"WARN",for_real);
if (rc>=0) rc = copyBoolOpt(opts,tdoc,TidyQuiet,"QUIET",KNO_TRUE);
if (rc>=0) rc = copyBoolOpt(opts,tdoc,TidyMakeBare,"BARE",for_real);
if (rc>=0) rc = copyBoolOpt(opts,tdoc,TidyMakeClean,"CLEAN",for_real);
if (rc>=0) rc = copyBoolOpt
(opts,tdoc,TidyDropEmptyParas,"DROPEMPTY",for_real);
if (rc>=0) rc = copyBoolOpt
(opts,tdoc,TidyFixComments,"FIXCOMMENTS",KNO_TRUE);
if (rc>=0) rc = copyBoolOpt(opts,tdoc,TidyXmlDecl,"XMLDECL",KNO_FALSE);
if (rc>=0) rc = copyBoolOpt(opts,tdoc,TidyEncloseBodyText,"ENCLOSEBODY",
for_real);
if (rc>=0) rc = copyBoolOpt(opts,tdoc,TidyEncloseBlockText,"ENCLOSEBLOCK",
KNO_FALSE);
if (rc>=0) rc = copyBoolOpt(opts,tdoc,TidyWord2000,"FIXWORD2000",KNO_TRUE);
if (rc>=0) rc = copyBoolOpt(opts,tdoc,TidyMark,"LEAVEMARK",KNO_FALSE);
if (rc>=0) rc = copyBoolOpt(opts,tdoc,TidyJoinClasses,"JOINCLASSES",KNO_TRUE);
if (rc>=0) rc = copyBoolOpt(opts,tdoc,TidyJoinStyles,"JOINSTYLES",KNO_TRUE);
if (rc>=0) rc = copyBoolOpt(opts,tdoc,TidyFixUri,"FIXURI",KNO_TRUE);
if (rc>=0) rc = copyBoolOpt(opts,tdoc,TidyNumEntities,"NUMENTITIES",KNO_TRUE);
if (rc>=0) rc = copyBoolOpt(opts,tdoc,TidyNCR,"NUMENTITIES",KNO_TRUE);
if (rc>=0) rc = copyStringOpt(opts,tdoc,TidyCSSPrefix,"CSSPREFIX","tidy-");
if (rc>=0) rc = copyBoolOpt(opts,tdoc,TidyQuoteAmpersand,"QUOTEAMP",KNO_TRUE);
if (rc>=0) rc = copyBoolOpt(opts,tdoc,TidyQuoteNbsp,"QUOTENBSP",KNO_FALSE);
if (rc>=0) rc = copyBoolOpt(opts,tdoc,TidyIndentAttributes,
"INDENTATTRIBS",KNO_TRUE);
if (rc>=0) rc = copyIntOpt(opts,tdoc,TidyIndentSpaces,"INDENTATION",2);
if (rc>=0) rc = copyIntOpt(opts,tdoc,TidyTabSize,"TABSIZE",5);
/*
if (rc>=0) rc = copyBoolOpt(opts,tdoc,TidyMergeDivs,"MERGEDIVS",KNO_FALSE);
if (rc>=0) rc = copyBoolOpt(opts,tdoc,TidyMergeSpans,"MERGESPANS",KNO_FALSE);
*/
if (rc>=0) {
lispval wrap = kno_getopt(opts,wrap_symbol,KNO_VOID);
if (KNO_INTP(wrap))
rc = tidyOptSetInt(tdoc,TidyWrapLen,KNO_FIX2INT(wrap));
else if (!((KNO_FALSEP(wrap))||(KNO_VOIDP(wrap))))
rc = tidyOptSetInt(tdoc,TidyWrapLen,80);
else {}}
if (rc>=0) {
lispval indent = kno_getopt(opts,doctype_symbol,KNO_VOID);
if (KNO_FALSEP(indent)) {}
else if (KNO_INTP(indent)) {
rc = tidyOptSetValue(tdoc,TidyIndentContent,"auto");
if (rc>=0)
rc = tidyOptSetInt(tdoc,TidyIndentSpaces,KNO_FIX2INT(indent));}
else rc = tidyOptSetValue(tdoc,TidyIndentContent,"auto");}
if (rc>=0) {
lispval doctype = kno_getopt(opts,doctype_symbol,KNO_VOID);
if (KNO_VOIDP(doctype))
tidyOptSetInt(tdoc,TidyDoctypeMode,TidyDoctypeAuto);
else if (KNO_FALSEP(doctype))
tidyOptSetInt(tdoc,TidyDoctypeMode,TidyDoctypeAuto);
else if (!(KNO_STRINGP(doctype))) {
tidyOptSetInt(tdoc,TidyDoctypeMode,TidyDoctypeUser);
tidyOptSetValue(tdoc,TidyDoctype,"<!DOCTYPE html>");}
else {
tidyOptSetInt(tdoc,TidyDoctypeMode,TidyDoctypeUser);
tidyOptSetValue(tdoc,TidyDoctype,KNO_CSTRING(doctype));}
kno_decref(doctype);}
if (rc<0) result = kno_err(kno_TidyError,"tidy_prim/setopts",errbuf.bp,KNO_VOID);
else {
lispval dontfix = kno_getopt(opts,dontfix_symbol,KNO_FALSE);
rc = tidyParseString(tdoc,KNO_CSTRING(string));
if (rc<0)
result = kno_err(kno_TidyError,"tidy_prim/parse",errbuf.bp,KNO_VOID);
else if (KNO_FALSEP(dontfix))
rc = tidyCleanAndRepair(tdoc);
else {}
if (!(KNO_VOIDP(result))) {}
else if (rc<0)
result = kno_err(kno_TidyError,"tidy_prim/clean",errbuf.bp,KNO_VOID);
else rc = ((tidyOptSetBool(tdoc,TidyForceOutput,yes))?(rc):(-1));
if (!(KNO_VOIDP(result))) {}
else if (rc<0)
result = kno_err(kno_TidyError,"tidy_prim/forceout",errbuf.bp,KNO_VOID);
else rc = tidySaveBuffer(tdoc,&outbuf);
if (!(KNO_VOIDP(result))) {}
else if (rc<0)
result = kno_err(kno_TidyError,"tidy_prim/output",errbuf.bp,KNO_VOID);
else result = kno_mkstring(outbuf.bp);
if ((!((KNO_VOIDP(diag))||(KNO_FALSEP(diag))))&&((rc>0)||(rc<0))) {
int drc = tidyRunDiagnostics(tdoc);
if (drc<0) u8_log(LOG_CRIT,"TIDY/diagfail","%s",errbuf.bp);
else if (KNO_APPLICABLEP(diag)) {
lispval arg = kno_mkstring(errbuf.bp);
lispval dresult = kno_apply(diag,1,&arg);
if (KNO_ABORTP(dresult)) {
kno_decref(result); result = dresult;}
else kno_decref(dresult);}
else u8_log(LOG_WARNING,"TIDY","%s",errbuf.bp);}}
tidyBufFree(&outbuf);
tidyBufFree(&errbuf);
tidyRelease(tdoc);
return result;
}
DEFC_PRIM("tidy5",tidy_prim,
KNO_MAX_ARGS(3)|KNO_MIN_ARGS(1),
"Cleans up HTML text in its agument",
{"string",kno_string_type,KNO_VOID},
{"opts",kno_any_type,KNO_VOID},
{"diag",kno_any_type,KNO_VOID})
static lispval tidy_prim(lispval string,lispval opts,lispval diag)
{
return tidy_prim_helper(string,opts,diag,1,-1);
}
DEFC_PRIM("tidy->indent",tidy_indent_prim,
KNO_MAX_ARGS(3)|KNO_MIN_ARGS(1),
"Cleans up HTML text, indenting the result",
{"string",kno_string_type,KNO_VOID},
{"opts",kno_any_type,KNO_VOID},
{"diag",kno_any_type,KNO_VOID})
static lispval tidy_indent_prim(lispval string,lispval opts,lispval diag)
{
return tidy_prim_helper(string,opts,diag,0,-1);
}
DEFC_PRIM("tidy->html",tidy_html_prim,
KNO_MAX_ARGS(3)|KNO_MIN_ARGS(1),
"Cleans up HTML text, generates HTML5 (not XHTML)",
{"string",kno_string_type,KNO_VOID},
{"opts",kno_any_type,KNO_VOID},
{"diag",kno_any_type,KNO_VOID})
static lispval tidy_html_prim(lispval string,lispval opts,lispval diag)
{
return tidy_prim_helper(string,opts,diag,1,0);
}
DEFC_PRIM("tidy->xhtml",tidy_xhtml_prim,
KNO_MAX_ARGS(3)|KNO_MIN_ARGS(1),
"Cleans up HTML text, generating XHTML",
{"string",kno_string_type,KNO_VOID},
{"opts",kno_any_type,KNO_VOID},
{"diag",kno_any_type,KNO_VOID})
static lispval tidy_xhtml_prim(lispval string,lispval opts,lispval diag)
{
return tidy_prim_helper(string,opts,diag,1,1);
}
static lispval tidy_module;
KNO_EXPORT int kno_init_tidy()
{
if (tidy_init) return 0;
tidy_init = u8_millitime();
doctype_symbol = kno_intern("doctype");
dontfix_symbol = kno_intern("dontfix");
xhtml_symbol = kno_intern("xhtml");
wrap_symbol = kno_intern("wrap");
tidy_module = kno_new_cmodule("tidy",0,kno_init_tidy);
link_local_cprims();
kno_finish_module(tidy_module);
u8_register_source_file(_FILEINFO);
return 1;
}
static void link_local_cprims()
{
KNO_LINK_CPRIM("TIDY5",tidy_prim,3,tidy_module);
KNO_LINK_CPRIM("TIDY->XHTML",tidy_xhtml_prim,3,tidy_module);
KNO_LINK_CPRIM("TIDY->INDENT",tidy_indent_prim,3,tidy_module);
KNO_LINK_CPRIM("TIDY->HTML",tidy_html_prim,3,tidy_module);
}