From adb717a02b486dd2075ae6f009f27dfa7804bc4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Koz=C5=82owski?= Date: Wed, 4 Oct 2023 04:42:39 +0200 Subject: [PATCH] wip fun --- .../smithyql/parser/v2/p.worksheet.sc | 200 ++++++++++++------ .../smithyql/parser/v2/parser.scala | 156 ++++++++------ 2 files changed, 236 insertions(+), 120 deletions(-) diff --git a/modules/parser/src/main/scala/playground/smithyql/parser/v2/p.worksheet.sc b/modules/parser/src/main/scala/playground/smithyql/parser/v2/p.worksheet.sc index ead2bf96..ab32623b 100644 --- a/modules/parser/src/main/scala/playground/smithyql/parser/v2/p.worksheet.sc +++ b/modules/parser/src/main/scala/playground/smithyql/parser/v2/p.worksheet.sc @@ -1,7 +1,81 @@ -import cats.implicits._ import playground.smithyql.parser.v2._ +import playground.smithyql.parser.v2.scanner.TokenKind.KW_SERVICE +import playground.smithyql.parser.v2.scanner.TokenKind.KW_USE import playground.smithyql.parser.v2.scanner._ +// sourceFile = decl* +// decl = useDecl | statement +// useDecl = "use" "service" fqn +// statement = query +// query = queryOperationName struct +// queryOperationName = (qualifiedIdent ".")? operationName +// operationName = ident +// qualifiedIdent = fqn +// fqn = namespace # ident +// namespace = ident ("." ident)* +// struct = "{" fields? "}" +// fields = field ("," field)* ("," | ) +// field = ident ":" node +// node = struct | list | literal +// list = "[" nodes? "]" +// nodes = node ("," node)* ("," | ) + +def parseUseDecl( + state: Parser +): GreenNode = { + import state.tokens + + val builder = GreenNode.builder(SyntaxKind.UseDecl) + + tokens.eatErrorsUntilNewlineOr(KW_USE, e => builder.addChild(GreenNode.error(e))) + + tokens.peek().kind match { + case KW_USE => + // all good, continue + builder.addChild(tokens.bump()) + case _ => + // USE was missing. + state.addError(Error.MisingToken(TokenKind.KW_USE)) + } + + tokens.eatErrorsUntilNewlineOr(TokenKind.KW_SERVICE, e => builder.addChild(GreenNode.error(e))) + + tokens.peek().kind match { + case KW_SERVICE => + // all good, continue + builder.addChild(tokens.bump()) + case _ => + // SERVICE was missing. + state.addError(Error.MisingToken(TokenKind.KW_SERVICE)) + } + + // we've gone past the need for keywords, time to eat a FQN + builder.addChild(parseFQN(state)) + + builder.build() +} + +def parseDecl( + state: Parser +): GreenNode = { + import state.tokens + val builder = GreenNode.builder(SyntaxKind.Decl) + tokens.peek().kind match { + case TokenKind.KW_USE => builder.addChild(parseUseDecl(state)) + // case _ => builder.addChild(parseStatement(state)) + } + builder.build() +} + +def parseSourceFile( + state: Parser +): GreenNode = { + val builder = GreenNode.builder(SyntaxKind.SourceFile) + while (!state.tokens.eof) + builder.addChild(parseDecl(state)) + builder.build() +} + def parseIdent( state: Parser ): GreenNode = { @@ -22,80 +96,86 @@ def parseNamespace( var done = false - while (!tokens.eof && !done) - tokens.peek().kind match { - case TokenKind.IDENT => - // todo: after an ident, expect dot or hash (some sort of state machine / another method in the recursive descent?) - // if it's an ident, report an error but don't wrap in ERROR - // otherwise, wrap in ERROR - builder.addChild(parseIdent(state)): Unit - - case TokenKind.DOT => - // swallow token - builder.addChild(tokens.bump()): Unit - - case TokenKind.HASH => done = true // end of namespace, move on - - case _ => - // skip extra/invalid tokens. we will report these in the future - builder.addChild(GreenNode.error(tokens.bump())) - tokens.bump(): Unit + while (!tokens.eofOrNewline && !done) + checkedLoop(tokens.id) { + tokens.peek().kind match { + case TokenKind.HASH => + // end of namespace, move on. + // Could be that the namespace is empty, technically an error, should we report? + done = true + + case TokenKind.IDENT => // will be captured in the next match + case _ => + // this is an error, unless it's whitespace. + builder.addChild(GreenNode.error(tokens.bump())) + } + + if (!done) { + // we have an ident, so parse it + builder.addChild(parseIdent(state)) + // look for a dot... or hash + tokens.eatErrorsUntilNewlineOr0( + List(TokenKind.DOT, TokenKind.HASH), + e => builder.addChild(GreenNode.error(e)), + ) + + if (!tokens.eof) { + if (tokens.peek().kind == TokenKind.HASH) { + done = true + } else if (tokens.peek().kind == TokenKind.DOT) { + builder.addChild(tokens.bump()) + } else { + // no dot, report an error but continue (maybe there's a hash ahead, for the next iteration) + state.addError(Error.MisingToken(TokenKind.DOT)) + } + } + + } else { + // we don't have an ident, so report an error + state.addError(Error.MisingToken(TokenKind.IDENT)) + } } builder.build() } +def checkedLoop[A]( + check: => A +)( + loop: => Unit +): Unit = { + val start = check + loop + val end = check + if (start == end) + sys.error("loop did not advance!") +} + def parseFQN( state: Parser ): GreenNode = { import state.tokens val builder = GreenNode.builder(SyntaxKind.FQN) + builder.addChild(parseNamespace(state)) + + tokens.eatErrorsUntilNewlineOr(TokenKind.HASH, e => builder.addChild(GreenNode.error(e))) + if (tokens.peek().kind == TokenKind.HASH) { builder.addChild(tokens.bump()) + } else { + state.addError(Error.MisingToken(TokenKind.HASH)) } + + // the rest of the line should be an ident builder.addChild(parseIdent(state)) + builder.build() } -SyntaxNode - .newRoot(parseIdent(Parser.init(TokenKind.IDENT("hello") :: Nil))) - .cast[Identifier] - .get - .value - -parseIdent(Parser.init(TokenKind.IDENT("hello") :: TokenKind.IDENT("world") :: Nil)) - -parseNamespace(Parser.init(Nil)) -parseNamespace(Parser.init(TokenKind.IDENT("hello") :: Nil)) - -SyntaxNode - .newRoot(parseNamespace(Parser.init(Scanner.scan("com.kubukoz.world")))) - .cast[Namespace] - .get - .parts - .map(_.value) - -val fqn = SyntaxNode.newRoot(parseFQN(Parser.fromString("com.kubukoz#foo"))).cast[FQN] -fqn.get.namespace.get.parts.map(_.value.get) -fqn.get.name.get.value.get - -//todo: this should have all tokens, even extraneous ones. Should render to the string above. -parseFQN(Parser.fromString("co111m.kub1ukoz#shrek_blob---,_,r")).allTokens.foldMap(_.text) - -parseFQN(Parser.fromString("co111m.kub1ukoz#shrek_blob---,_,r")) -parseFQN(Parser.fromString("co111m.kub1ukoz#shrek_blob---,_,r")).print - -val text = "com.kubukoz#helloworld" -pprint.pprintln(Scanner.scan(text)) -pprint.pprintln(parseFQN(Parser.fromString(text))) -pprint.pprintln(SyntaxNode.newRoot(parseFQN(Parser.fromString(text)))) -// pprint.pprintln(SyntaxNode.newRoot(parseFQN(Parser.fromString(text))).children) -println(SyntaxNode.newRoot(parseFQN(Parser.fromString(text))).print) -println( - SyntaxNode - .newRoot(parseFQN(Parser.fromString(text))) - .findAt("com.kubukoz#h".length) - .get - .pathTo -) +// val p = Parser.fromString("use service foo.bar#baz") +val p = Parser.fromString("foo.bar#baz") +SyntaxNode.newRoot(parseFQN(p)).print + +p.tokens.eof +p.errors diff --git a/modules/parser/src/main/scala/playground/smithyql/parser/v2/parser.scala b/modules/parser/src/main/scala/playground/smithyql/parser/v2/parser.scala index 52700d5b..bc8311e3 100644 --- a/modules/parser/src/main/scala/playground/smithyql/parser/v2/parser.scala +++ b/modules/parser/src/main/scala/playground/smithyql/parser/v2/parser.scala @@ -83,9 +83,9 @@ case class SyntaxNode( green: Either[GreenNode, Token], ) { - def cast[A]( - implicit mirror: AstNodeMirror[A] - ): Option[A] = mirror.cast(this) +// // def cast[A]( +// // implicit mirror: AstNodeMirror[A] +// // ): Option[A] = mirror.cast(this) def width = green.fold(_.width, _.width) @@ -163,100 +163,108 @@ object SyntaxNode { sealed trait SyntaxKind extends Product with Serializable object SyntaxKind { - case object File extends SyntaxKind + case object SourceFile extends SyntaxKind + case object Decl extends SyntaxKind + case object UseDecl extends SyntaxKind case object FQN extends SyntaxKind case object Namespace extends SyntaxKind case object Identifier extends SyntaxKind case object ERROR extends SyntaxKind } -trait AstNode[Self] { self: Product => - def syntax: SyntaxNode +// trait AstNode[Self] { self: Product => +// def syntax: SyntaxNode - def firstChildToken( - kind: TokenKind - ): Option[Token] = syntax.children.collectFirst { - case SyntaxNode(_, _, Right(tok @ Token(`kind`, _))) => tok - } +// def firstChildToken( +// kind: TokenKind +// ): Option[Token] = syntax.children.collectFirst { +// case SyntaxNode(_, _, Right(tok @ Token(`kind`, _))) => tok +// } - def allChildNodes[N: AstNodeMirror]: List[N] = syntax.children.mapFilter(_.cast[N]) +// def allChildNodes[N: AstNodeMirror]: List[N] = syntax.children.mapFilter(_.cast[N]) - def firstChildNode[N: AstNodeMirror]: Option[N] = syntax.children.collectFirstSome(_.cast[N]) +// def firstChildNode[N: AstNodeMirror]: Option[N] = syntax.children.collectFirstSome(_.cast[N]) -} +// } -trait AstNodeMirror[Self] { +// trait AstNodeMirror[Self] { - def cast( - node: SyntaxNode - ): Option[Self] +// def cast( +// node: SyntaxNode +// ): Option[Self] -} +// } -object AstNodeMirror { +// object AstNodeMirror { - def instance[T]( - matchingSyntaxKind: SyntaxKind - )( - make: SyntaxNode => T - ): AstNodeMirror[T] = - node => - node.green.left.map(_.kind) match { - case Left(`matchingSyntaxKind`) => Some(make(node)) - case _ => None - } +// def instance[T]( +// matchingSyntaxKind: SyntaxKind +// )( +// make: SyntaxNode => T +// ): AstNodeMirror[T] = +// node => +// node.green.left.map(_.kind) match { +// case Left(`matchingSyntaxKind`) => Some(make(node)) +// case _ => None +// } -} +// } // concrete -case class Identifier( - syntax: SyntaxNode -) extends AstNode[Identifier] { - def value: Option[Token] = firstChildToken(TokenKind.IDENT) -} +// case class Identifier( +// syntax: SyntaxNode +// ) extends AstNode[Identifier] { +// def value: Option[Token] = firstChildToken(TokenKind.IDENT) +// } -object Identifier { +// object Identifier { - implicit val node: AstNodeMirror[Identifier] = - AstNodeMirror.instance(SyntaxKind.Identifier)(apply) +// implicit val node: AstNodeMirror[Identifier] = +// AstNodeMirror.instance(SyntaxKind.Identifier)(apply) -} +// } -case class Namespace( - syntax: SyntaxNode -) extends AstNode[Namespace] { - def parts: List[Identifier] = allChildNodes[Identifier] -} +// case class Namespace( +// syntax: SyntaxNode +// ) extends AstNode[Namespace] { +// def parts: List[Identifier] = allChildNodes[Identifier] +// } -object Namespace { +// object Namespace { - implicit val node: AstNodeMirror[Namespace] = AstNodeMirror.instance(SyntaxKind.Namespace)(apply) +// implicit val node: AstNodeMirror[Namespace] = AstNodeMirror.instance(SyntaxKind.Namespace)(apply) -} +// } -case class FQN( - syntax: SyntaxNode -) extends AstNode[FQN] { - def namespace: Option[Namespace] = firstChildNode[Namespace] - def name: Option[Identifier] = firstChildNode[Identifier] -} +// case class FQN( +// syntax: SyntaxNode +// ) extends AstNode[FQN] { +// def namespace: Option[Namespace] = firstChildNode[Namespace] +// def name: Option[Identifier] = firstChildNode[Identifier] +// } -object FQN { +// object FQN { - implicit val node: AstNodeMirror[FQN] = AstNodeMirror.instance(SyntaxKind.FQN)(apply) +// implicit val node: AstNodeMirror[FQN] = AstNodeMirror.instance(SyntaxKind.FQN)(apply) -} +// } case class Tokens( private var all: List[Token], private var cursor: Int, ) { + def id: Int = cursor def eof: Boolean = cursor >= all.length + def eofOrNewline: Boolean = cursor >= all.length || peek().kind == TokenKind.NEWLINE def peek( - ): Token = all(cursor) + ): Token = + try all(cursor) + catch { + case _: IndexOutOfBoundsException => sys.error("peeked into EOF!") + } def bump( ): Token = { @@ -265,6 +273,28 @@ case class Tokens( result } + // def eatUntilNewlineOr( + // tok: TokenKind + // ): List[Token] = { + // val result = all.takeWhile(t => t.kind != TokenKind.NEWLINE && t.kind != tok) + // all = all.drop(result.length) + // result + // } + + def eatErrorsUntilNewlineOr( + tok: TokenKind, + err: Token => Unit, + ): Unit = eatErrorsUntilNewlineOr0(List(tok), err) + + def eatErrorsUntilNewlineOr0( + toks: List[TokenKind], + err: Token => Unit, + ): Unit = + while (!eof && peek().kind != TokenKind.NEWLINE && !toks.contains(peek().kind)) { + val next = bump() + err(next) + } + } object Tokens { @@ -291,8 +321,14 @@ object Error { case class Parser( tokens: Tokens, - errors: List[Error], -) + var errors: List[Error], +) { + + def addError( + error: Error + ): Unit = errors ::= error + +} object Parser {