Skip to content

Commit

Permalink
feat: Support xmp tag parsing (#1790)
Browse files Browse the repository at this point in the history
  • Loading branch information
nati-elmaliach authored Dec 23, 2024
1 parent 278e39b commit ecdb071
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 8 deletions.
9 changes: 9 additions & 0 deletions src/Tokenizer.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ describe("Tokenizer", () => {
it("for self-closing textarea tag", () => {
expect(tokenize("<textarea /><div></div>")).toMatchSnapshot();
});
it("for self-closing xmp tag", () => {
expect(tokenize("<xmp /><div></div>")).toMatchSnapshot();
});
});

describe("should support standard special tags", () => {
Expand All @@ -54,6 +57,9 @@ describe("Tokenizer", () => {
tokenize("<textarea></textarea><div></div>"),
).toMatchSnapshot();
});
it("for normal xmp tag", () => {
expect(tokenize("<xmp></xmp><div></div>")).toMatchSnapshot();
});
});

describe("should treat html inside special tags as text", () => {
Expand All @@ -71,6 +77,9 @@ describe("Tokenizer", () => {
tokenize("<textarea><div></div></textarea>"),
).toMatchSnapshot();
});
it("for div inside xmp tag", () => {
expect(tokenize("<xmp><div></div></xmp>")).toMatchSnapshot();
});
});

describe("should correctly mark attributes", () => {
Expand Down
33 changes: 25 additions & 8 deletions src/Tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ const Sequences = {
TextareaEnd: new Uint8Array([
0x3c, 0x2f, 0x74, 0x65, 0x78, 0x74, 0x61, 0x72, 0x65, 0x61,
]), // `</textarea`
XmpEnd: new Uint8Array([0x3c, 0x2f, 0x78, 0x6d, 0x70]), // `</xmp`
};

export default class Tokenizer {
Expand Down Expand Up @@ -391,7 +392,10 @@ export default class Tokenizer {
this.state = State.InTagName;
} else if (lower === Sequences.ScriptEnd[2]) {
this.state = State.BeforeSpecialS;
} else if (lower === Sequences.TitleEnd[2]) {
} else if (
lower === Sequences.TitleEnd[2] ||
lower === Sequences.XmpEnd[2]
) {
this.state = State.BeforeSpecialT;
} else {
this.state = State.InTagName;
Expand Down Expand Up @@ -593,13 +597,26 @@ export default class Tokenizer {

private stateBeforeSpecialT(c: number): void {
const lower = c | 0x20;
if (lower === Sequences.TitleEnd[3]) {
this.startSpecial(Sequences.TitleEnd, 4);
} else if (lower === Sequences.TextareaEnd[3]) {
this.startSpecial(Sequences.TextareaEnd, 4);
} else {
this.state = State.InTagName;
this.stateInTagName(c); // Consume the token again
switch (lower) {
case Sequences.TitleEnd[3]: {
this.startSpecial(Sequences.TitleEnd, 4);

break;
}
case Sequences.TextareaEnd[3]: {
this.startSpecial(Sequences.TextareaEnd, 4);

break;
}
case Sequences.XmpEnd[3]: {
this.startSpecial(Sequences.XmpEnd, 4);

break;
}
default: {
this.state = State.InTagName;
this.stateInTagName(c); // Consume the token again
}
}
}

Expand Down
94 changes: 94 additions & 0 deletions src/__snapshots__/Tokenizer.spec.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,37 @@ exports[`Tokenizer > should support self-closing special tags > for self-closing
]
`;

exports[`Tokenizer > should support self-closing special tags > for self-closing xmp tag 1`] = `
[
[
"onopentagname",
1,
4,
],
[
"onselfclosingtag",
6,
],
[
"onopentagname",
8,
11,
],
[
"onopentagend",
11,
],
[
"onclosetag",
14,
17,
],
[
"onend",
],
]
`;

exports[`Tokenizer > should support standard special tags > for normal script tag 1`] = `
[
[
Expand Down Expand Up @@ -631,6 +662,42 @@ exports[`Tokenizer > should support standard special tags > for normal textarea
]
`;

exports[`Tokenizer > should support standard special tags > for normal xmp tag 1`] = `
[
[
"onopentagname",
1,
4,
],
[
"onopentagend",
4,
],
[
"onclosetag",
7,
10,
],
[
"onopentagname",
12,
15,
],
[
"onopentagend",
15,
],
[
"onclosetag",
18,
21,
],
[
"onend",
],
]
`;

exports[`Tokenizer > should treat html inside special tags as text > for div inside script tag 1`] = `
[
[
Expand Down Expand Up @@ -738,3 +805,30 @@ exports[`Tokenizer > should treat html inside special tags as text > for div ins
],
]
`;

exports[`Tokenizer > should treat html inside special tags as text > for div inside xmp tag 1`] = `
[
[
"onopentagname",
1,
4,
],
[
"onopentagend",
4,
],
[
"ontext",
5,
16,
],
[
"onclosetag",
18,
21,
],
[
"onend",
],
]
`;

0 comments on commit ecdb071

Please sign in to comment.