Skip to content

Commit

Permalink
Use strcspn() to optimize dom_html5_escape_string() (#12948)
Browse files Browse the repository at this point in the history
* Use strcspn() to optimize dom_html5_escape_string()

This routine implemented by libc uses a faster algorithm than the old
naive byte-per-byte approach here. It also is often optimized using
SIMD.

* Calculate mask outside of loop
  • Loading branch information
nielsdos authored Dec 16, 2023
1 parent 82baeeb commit 0870da3
Showing 1 changed file with 20 additions and 16 deletions.
36 changes: 20 additions & 16 deletions ext/dom/html5_serializer.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,17 @@ static zend_result dom_html5_escape_string(dom_html5_serialize_context *ctx, con
{
const char *last_output = content;

while (*content != '\0') {
/* Note: uses UTF-8 internally, so <C2 A0> indicates a non-breaking space */
const char *mask = attribute_mode ? "&\xC2\"" : "&\xC2<>";

while (true) {
size_t chunk_length = strcspn(content, mask);

content += chunk_length;
if (*content == '\0') {
break;
}

switch (*content) {
/* Step 1 */
case '&': {
Expand All @@ -93,29 +103,23 @@ static zend_result dom_html5_escape_string(dom_html5_serialize_context *ctx, con

/* Step 3 */
case '"': {
if (attribute_mode) {
TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
TRY(ctx->write_string_len(ctx->application_data, "&quot;", strlen("&quot;")));
last_output = content + 1;
}
TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
TRY(ctx->write_string_len(ctx->application_data, "&quot;", strlen("&quot;")));
last_output = content + 1;
break;
}

/* Step 4 */
case '<': {
if (!attribute_mode) {
TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
TRY(ctx->write_string_len(ctx->application_data, "&lt;", strlen("&lt;")));
last_output = content + 1;
}
TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
TRY(ctx->write_string_len(ctx->application_data, "&lt;", strlen("&lt;")));
last_output = content + 1;
break;
}
case '>': {
if (!attribute_mode) {
TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
TRY(ctx->write_string_len(ctx->application_data, "&gt;", strlen("&gt;")));
last_output = content + 1;
}
TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
TRY(ctx->write_string_len(ctx->application_data, "&gt;", strlen("&gt;")));
last_output = content + 1;
break;
}
}
Expand Down

0 comments on commit 0870da3

Please sign in to comment.