Skip to content

Commit

Permalink
web-html: Finish implementing the IN_HEAD and IN_HEAD_NOSCRIPT parsin…
Browse files Browse the repository at this point in the history
…g states.
  • Loading branch information
sleepy-monax committed Mar 26, 2024
1 parent 33e96f0 commit 3f2097c
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 24 deletions.
2 changes: 1 addition & 1 deletion src/web/web-html/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ void Lexer::_raise(Str msg) {
}

void Lexer::consume(Rune rune, bool isEof) {
logDebug("Lexing '{#c}' {#x} in {}", rune, rune, toStr(_state));
// logDebug("Lexing '{#c}' {#x} in {}", rune, rune, toStr(_state));

switch (_state) {

Expand Down
88 changes: 85 additions & 3 deletions src/web/web-html/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ void Parser::_handleBeforeHead(Token const &t) {
// A start tag whose tag name is "head"
else if (t.type == Token::START_TAG and t.name == "head") {
_headElement = insertHtmlElement(*this, t);
_switchTo(Mode::IN_HEAD);
}

// Anything else
Expand All @@ -428,6 +429,12 @@ void Parser::_handleBeforeHead(Token const &t) {

// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhead
void Parser::_handleInHead(Token const &t) {
auto anythingElse = [&] {
_openElements.popBack();
_switchTo(Mode::AFTER_HEAD);
accept(t);
};

// A character token that is one of U+0009 CHARACTER TABULATION,
// U+000A LINE FEED (LF), U+000C FORM FEED (FF),
// U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
Expand Down Expand Up @@ -520,6 +527,82 @@ void Parser::_handleInHead(Token const &t) {

// 10. Switch the insertion mode to "text".
_switchTo(Mode::TEXT);
} else if (t.type == Token::END_TAG and (t.name == "head")) {
_openElements.popBack();
_switchTo(Mode::AFTER_HEAD);
} else if (t.type == Token::END_TAG and (t.name == "body" or t.name == "html" or t.name == "br")) {
anythingElse();
} else if (t.type == Token::START_TAG and (t.name == "template")) {
// NOSPEC: We don't support templates
} else if (t.type == Token::END_TAG and (t.name == "template")) {
// NOSPEC: We don't support templates
} else if ((t.type == Token::START_TAG and (t.name == "head")) or t.type == Token::END_TAG) {
// ignore
_raise();
} else {
anythingElse();
}
}

// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inheadnoscript
void Parser::_handleInHeadNoScript(Token const &t) {
auto anythingElse = [&] {
_raise();
_openElements.popBack();
_switchTo(Mode::IN_HEAD);
accept(t);
};

// A DOCTYPE token
if (t.type == Token::DOCTYPE) {
_raise();
}

// A start tag whose tag name is "html"
else if (t.type == Token::START_TAG and (t.name == "html")) {
_acceptIn(Mode::IN_BODY, t);
}

// An end tag whose tag name is "noscript"
else if (t.type == Token::END_TAG and (t.name == "noscript")) {
_openElements.popBack();
_switchTo(Mode::IN_HEAD);
}

// A character token that is one of
// - U+0009 CHARACTER TABULATION,
// - U+000A LINE FEED (LF),
// - U+000C FORM FEED (FF),
// - U+000D CARRIAGE RETURN (CR),
// - or U+0020 SPACE
// A comment token
// A start tag whose tag name is one of: "basefont", "bgsound", "link", "meta", "noframes", "style"
else if (
(t.type == Token::CHARACTER and (t.rune == '\t' or t.rune == '\n' or t.rune == '\f' or t.rune == ' ')) or
t.type == Token::COMMENT or
(t.type == Token::START_TAG and (t.name == "basefont" or t.name == "bgsound" or t.name == "link" or t.name == "meta" or t.name == "noframes" or t.name == "style"))
) {
_acceptIn(Mode::IN_HEAD, t);
}

// An end tag whose tag name is "br"
else if (t.type == Token::END_TAG and (t.name == "br")) {
anythingElse();
}

// A start tag whose tag name is one of: "head", "noscript"
// Any other end tag
else if (
(t.type == Token::START_TAG and (t.name == "head" or t.name == "noscript")) or
t.type == Token::END_TAG
) {
// ignore
_raise();
}

// Anything else
else {
anythingElse();
}
}

Expand Down Expand Up @@ -548,10 +631,9 @@ void Parser::_acceptIn(Mode mode, Token const &t) {
_handleInHead(t);
break;

// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inheadnoscript
case Mode::IN_HEAD_NOSCRIPT: {
case Mode::IN_HEAD_NOSCRIPT:
_handleInHeadNoScript(t);
break;
}

// https://html.spec.whatwg.org/multipage/parsing.html#the-after-head-insertion-mode
case Mode::AFTER_HEAD: {
Expand Down
2 changes: 2 additions & 0 deletions src/web/web-html/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ struct Parser : public Sink {

void _handleInHead(Token const &t);

void _handleInHeadNoScript(Token const &t);

void _switchTo(Mode mode);

void _acceptIn(Mode mode, Token const &t);
Expand Down
5 changes: 3 additions & 2 deletions src/web/web-http/fetch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,12 @@ Async::Task<usize> _fetch(Mime::Url const &url, Sys::_Connection &conn, Io::Writ
"GET {} HTTP/1.1\r\n"
"Host: {}\r\n"
"Connection: close\r\n"
"User-Agent: Karm Web/" stringify$(__ck_version_value) "\r\n"
"\r\n",
"User-Agent: Karm Web Fetch/" stringify$(__ck_version_value) "\r\n"
"\r\n",
url.path,
url.host
));

co_try$(conn.write(req.bytes()));

// Read response
Expand Down
33 changes: 15 additions & 18 deletions src/web/web-json/parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,8 @@ Res<String> parseStr(Io::SScan &s) {
}
}

if (s.next()) {
if (s.next())
continue;
}

return Error::invalidData("invalid string");
}
Expand All @@ -76,9 +75,8 @@ Res<Object> parseObject(Io::SScan &s) {
auto key = try$(parseStr(s));

s.eat(Re::space());
if (not s.skip(':')) {
if (not s.skip(':'))
return Error::invalidData("expected ':'");
}

s.eat(Re::space());

Expand All @@ -87,24 +85,22 @@ Res<Object> parseObject(Io::SScan &s) {

s.eat(Re::space());

if (s.skip('}')) {
if (s.skip('}'))
return Ok(m);
}
if (not s.skip(',')) {

if (not s.skip(','))
return Error::invalidData("expected ','");
}
}
}

Res<Array> parseArray(Io::SScan &s) {
Array v;
if (not s.skip('[')) {
if (not s.skip('['))
return Error::invalidData("expected '['");
}

if (s.skip(']')) {
if (s.skip(']'))
return Ok(v);
}

while (true) {
s.eat(Re::space());

Expand All @@ -113,12 +109,11 @@ Res<Array> parseArray(Io::SScan &s) {

s.eat(Re::space());

if (s.skip(']')) {
if (s.skip(']'))
return Ok(v);
}
if (not s.skip(',')) {

if (not s.skip(','))
return Error::invalidData("expected ','");
}
}
}

Expand All @@ -136,6 +131,7 @@ Res<usize> parseDigits(Io::SScan &s) {
s.next();
++digits;
}

return Ok(digits);
}

Expand All @@ -152,6 +148,7 @@ Res<f64> parseDecimal(Io::SScan &s) {
fpart += parseAsciiDecDigit(s.next()) * multiplier;
multiplier /= 10;
}

return Ok(fpart);
}

Expand All @@ -174,15 +171,15 @@ Res<isize> parseInteger(Io::SScan &s) {
while (s.match(Re::digit()) != Match::NO) {
ipart = ipart * 10 + parseAsciiDecDigit(s.next());
}

return Ok(sign ? -ipart : ipart);
}

Res<Value> parseNumber(Io::SScan &s) {
isize ipart = try$(parseInteger(s));

if (s.match(Re::single('.', 'e', 'E')) == Match::NO) {
if (s.match(Re::single('.', 'e', 'E')) == Match::NO)
return Ok<Value>(ipart);
}

// NOTE: Floating point numbers are not supported in freestanding environments.
#ifdef __ck_freestanding__
Expand Down

0 comments on commit 3f2097c

Please sign in to comment.