From 42f4926e8caa6bb003f7dcdddb48b2b87c3c11a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Koz=C5=82owski?= Date: Mon, 2 Oct 2023 02:22:05 +0200 Subject: [PATCH 01/20] Begin work on a new scanner --- .../smithyql/parser/v2/scanner.scala | 166 +++++++++++++++++ .../smithyql/parser/v2/ScannerTests.scala | 171 ++++++++++++++++++ 2 files changed, 337 insertions(+) create mode 100644 modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala create mode 100644 modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala diff --git a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala new file mode 100644 index 00000000..1cf4799d --- /dev/null +++ b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala @@ -0,0 +1,166 @@ +package playground.smithyql.parser.v2.scanner + +import cats.kernel.Eq +import cats.syntax.all.* + +case class Token( + kind: TokenKind, + text: String, +) { + def width: Int = text.length +} + +object Token { + implicit val eq: Eq[Token] = Eq.fromUniversalEquals +} + +sealed trait TokenKind extends Product with Serializable { + + def apply( + text: String + ): Token = Token(this, text) + +} + +object TokenKind { + case object KW_IMPORT extends TokenKind + case object DOT extends TokenKind + case object COMMA extends TokenKind + case object HASH extends TokenKind + case object LB extends TokenKind + case object RB extends TokenKind + case object LBR extends TokenKind + case object RBR extends TokenKind + case object EQ extends TokenKind + case object SPACE extends TokenKind + case object NEWLINE extends TokenKind + case object IDENT extends TokenKind + case object COMMENT extends TokenKind + case object Error extends TokenKind + + implicit val eq: Eq[TokenKind] = Eq.fromUniversalEquals +} + +object Scanner { + + /** Entrypoint to scanning text into tokens. + * + * Always produces an output that can be rendered back to the original text. + */ + def scan( + s: String + ): List[Token] = { + var remaining = s + var tokens = List.empty[Token] + def add( + tok: Token + ) = tokens ::= tok + + def readSimple( + token: Char, + tok: TokenKind, + ): PartialFunction[Char, Unit] = { case `token` => + add(tok(token.toString)) + remaining = remaining.tail + } + + def simpleTokens( + pairings: ( + Char, + TokenKind, + )* + ): PartialFunction[Char, Unit] = pairings + .map(readSimple.tupled) + .reduce(_ orElse _) + + val readOne: PartialFunction[Char, Unit] = simpleTokens( + '.' -> TokenKind.DOT, + ',' -> TokenKind.COMMA, + '#' -> TokenKind.HASH, + '[' -> TokenKind.LB, + ']' -> TokenKind.RB, + '{' -> TokenKind.LBR, + '}' -> TokenKind.RBR, + '=' -> TokenKind.EQ, + ).orElse { + case letter if letter.isLetter => + val (letters, rest) = remaining.span(ch => ch.isLetterOrDigit || ch == '_') + add(TokenKind.IDENT(letters)) + remaining = rest + } + + // split "whitespace" string into chains of contiguous newlines OR whitespace characters. + def whitespaceChains( + whitespace: String + ): List[Token] = { + val isNewline = (ch: Char) => ch == '\n' + + if (whitespace.isEmpty) + Nil + else if (isNewline(whitespace.head)) { + val (nl, rest) = whitespace.span(isNewline) + TokenKind.NEWLINE(nl) :: whitespaceChains(rest) + } else { + val (wsp, rest) = whitespace.span(!isNewline(_)) + TokenKind.SPACE(wsp) :: whitespaceChains(rest) + } + } + + def eatWhitespace( + ) = { + val (wsp, rest) = remaining.span(ch => ch.isWhitespace) + if (wsp.isEmpty()) + false + else { + whitespaceChains(wsp).foreach(add) + remaining = rest + + true + } + } + + def eatComments( + ) = + if (!remaining.startsWith("//")) + false + else { + while (remaining.startsWith("//")) { + val (comment, rest) = remaining.span(_ != '\n') + add(TokenKind.COMMENT(comment)) + remaining = rest + } + + true + } + + def eatErrors( + ) = { + // todo: bug: even if the next character starts a multi-char token, this will consider it an error. + // instead, we should rework "readOne" to consume arbitrary constant-length tokens, and also include the possibility that `rest` has comments or whitespace. + val (failures, rest) = remaining.span(!readOne.isDefinedAt(_)) + remaining = rest + if (failures.nonEmpty) { + add(TokenKind.Error(failures)) + true + } else + false + } + + while (remaining.nonEmpty) { + val last = remaining + + readOne.applyOrElse[Char, Any]( + remaining.head, + (_: Char) => + // nothing matched. Eat whitespace and see if the rest is an error + eatWhitespace() || eatComments() || eatErrors(), + ) + + if (remaining == last) + sys.error(s"no progress in the last run! remaining string: $remaining") + } + + tokens.reverse + } + +} diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala new file mode 100644 index 00000000..a2cb582f --- /dev/null +++ b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala @@ -0,0 +1,171 @@ +package playground.smithyql.parser.v2 + +import cats.effect.IO +import cats.implicits._ +import org.scalacheck.Arbitrary +import org.scalacheck.Gen +import playground.smithyql.parser.v2.scanner.Scanner +import playground.smithyql.parser.v2.scanner.Token +import playground.smithyql.parser.v2.scanner.TokenKind +import weaver._ +import weaver.scalacheck.Checkers + +import Scanner.scan + +object ScannerTests extends SimpleIOSuite with Checkers { + + def arbTests( + name: TestName + )( + withArb: Arbitrary[String] => IO[Expectations] + ): Unit = { + + val sampleStringGen = Gen.oneOf( + Gen.alphaStr, + Gen.alphaNumStr, + Gen.asciiPrintableStr, + Gen.identifier, + Gen.oneOf(List(' ', '\n', '\t', '\r', '\f', '\b')).map(_.toString), + ) + + val arbString: Arbitrary[String] = Arbitrary { + Gen.listOf(sampleStringGen).map(_.mkString) + } + + test(name)(withArb(Arbitrary.arbString)) + test(name.copy(name = name.name + " (prepared input)"))(withArb(arbString)) + } + + arbTests("Any string input scans successfully") { implicit arbString => + forall { (s: String) => + scan(s): Unit + success + } + } + + arbTests("Scanning is lossless") { implicit arbString => + forall { (s: String) => + assert.eql(scan(s).foldMap(_.text), s) + } + } + + private def scanTest( + input: String, + explicitName: String = "", + )( + expected: List[Token] + ): Unit = + pureTest( + if (explicitName.nonEmpty) + explicitName + else + "Scan string: " + sanitize(input) + ) { + assert.eql(expected, scan(input)) + } + + private def sanitize( + text: String + ) = text.replace(" ", "·").replace("\n", "↵") + + scanTest("{")(List(TokenKind.LBR("{"))) + scanTest("}")(List(TokenKind.RBR("}"))) + scanTest("[")(List(TokenKind.LB("["))) + scanTest("]")(List(TokenKind.RB("]"))) + scanTest(".")(List(TokenKind.DOT("."))) + scanTest(",")(List(TokenKind.COMMA(","))) + scanTest("#")(List(TokenKind.HASH("#"))) + scanTest("=")(List(TokenKind.EQ("="))) + scanTest("a")(List(TokenKind.IDENT("a"))) + + // idents + scanTest("abcdef")(List(TokenKind.IDENT("abcdef"))) + + scanTest( + "hello_world" + )( + List( + TokenKind.IDENT("hello_world") + ) + ) + + scanTest( + "helloworld123" + )( + List( + TokenKind.IDENT("helloworld123") + ) + ) + + // whitespace + scanTest(" ")(List(TokenKind.SPACE(" "))) + scanTest("\n")(List(TokenKind.NEWLINE("\n"))) + + // contiguous whitespace of all kinds + // notably newlines are grouped together separately from other whitespace + scanTest(" \r \r \n\n")(List(TokenKind.SPACE(" \r \r "), TokenKind.NEWLINE("\n\n"))) + scanTest(" \n\n \n ")( + List( + TokenKind.SPACE(" "), + TokenKind.NEWLINE("\n\n"), + TokenKind.SPACE(" "), + TokenKind.NEWLINE("\n"), + TokenKind.SPACE(" "), + ) + ) + + // comments + scanTest("// hello 123 foo bar --")(List(TokenKind.COMMENT("// hello 123 foo bar --"))) + + scanTest( + explicitName = "Scan multiple line-comments", + input = + """//hello + |//world""".stripMargin, + )( + List( + TokenKind.COMMENT("//hello"), + TokenKind.NEWLINE("\n"), + TokenKind.COMMENT("//world"), + ) + ) + + scanTest( + "hello world //this is a comment" + )( + List( + TokenKind.IDENT("hello"), + TokenKind.SPACE(" "), + TokenKind.IDENT("world"), + TokenKind.SPACE(" "), + TokenKind.COMMENT("//this is a comment"), + ) + ) + + // errors + + scanTest( + explicitName = "Error tokens for input that doesn't match any other token", + input = "🤷*%$^@-+?", + )(List(TokenKind.Error("🤷*%$^@-+?"))) + + scanTest( + explicitName = "Error tokens mixed between other tokens", + input = "hello@world-this?is=an Date: Mon, 2 Oct 2023 02:27:29 +0200 Subject: [PATCH 02/20] Support colons --- .../src/main/scala/playground/smithyql/parser/v2/scanner.scala | 2 ++ .../test/scala/playground/smithyql/parser/v2/ScannerTests.scala | 1 + 2 files changed, 3 insertions(+) diff --git a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala index 1cf4799d..85c69630 100644 --- a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala +++ b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala @@ -31,6 +31,7 @@ object TokenKind { case object RB extends TokenKind case object LBR extends TokenKind case object RBR extends TokenKind + case object COLON extends TokenKind case object EQ extends TokenKind case object SPACE extends TokenKind case object NEWLINE extends TokenKind @@ -81,6 +82,7 @@ object Scanner { ']' -> TokenKind.RB, '{' -> TokenKind.LBR, '}' -> TokenKind.RBR, + ':' -> TokenKind.COLON, '=' -> TokenKind.EQ, ).orElse { case letter if letter.isLetter => diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala index a2cb582f..9885b45f 100644 --- a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala +++ b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala @@ -75,6 +75,7 @@ object ScannerTests extends SimpleIOSuite with Checkers { scanTest(".")(List(TokenKind.DOT("."))) scanTest(",")(List(TokenKind.COMMA(","))) scanTest("#")(List(TokenKind.HASH("#"))) + scanTest(":")(List(TokenKind.COLON(":"))) scanTest("=")(List(TokenKind.EQ("="))) scanTest("a")(List(TokenKind.IDENT("a"))) From 84e42a36b999caeb0c1ecfaa2e5b1f00aaf4884c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Koz=C5=82owski?= Date: Mon, 2 Oct 2023 02:31:12 +0200 Subject: [PATCH 03/20] Add a more complex test case --- .../smithyql/parser/v2/ScannerTests.scala | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala index 9885b45f..63e99219 100644 --- a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala +++ b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala @@ -169,4 +169,36 @@ object ScannerTests extends SimpleIOSuite with Checkers { ) ) + // complex cases + + scanTest( + explicitName = "many tokens of punctuation and idents mixed with error nodes and comments", + input = + """{foo}[bar].baz,xx#:=abc123def ghe--eef //hello + |""".stripMargin, + )( + List( + TokenKind.LBR("{"), + TokenKind.IDENT("foo"), + TokenKind.RBR("}"), + TokenKind.LB("["), + TokenKind.IDENT("bar"), + TokenKind.RB("]"), + TokenKind.DOT("."), + TokenKind.IDENT("baz"), + TokenKind.COMMA(","), + TokenKind.IDENT("xx"), + TokenKind.HASH("#"), + TokenKind.COLON(":"), + TokenKind.EQ("="), + TokenKind.IDENT("abc123def"), + TokenKind.SPACE(" "), + TokenKind.IDENT("ghe"), + TokenKind.Error("--"), + TokenKind.IDENT("eef"), + TokenKind.SPACE(" "), + TokenKind.COMMENT("//hello"), + TokenKind.NEWLINE("\n"), + ) + ) } From c00a59aa44d68057ec4601173e41e4911f6bb6c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Koz=C5=82owski?= Date: Mon, 2 Oct 2023 02:34:08 +0200 Subject: [PATCH 04/20] Add scanTestReverse --- .../smithyql/parser/v2/ScannerTests.scala | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala index 63e99219..36d2c4cf 100644 --- a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala +++ b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala @@ -64,6 +64,16 @@ object ScannerTests extends SimpleIOSuite with Checkers { assert.eql(expected, scan(input)) } + // Runs scanTest by first rendering the expected tokens to a string, then scanning it to get them back. + // If the output is not the same as the input, the test fails. + // While it's guaranteed that rendering tokens to text produces scannable code (everything is scannable), + // due to ambiguities in the scanner it's not guaranteed that the output will be the same as the input - hence the need to test. + private def scanTestReverse( + explicitName: String + )( + expected: List[Token] + ): Unit = scanTest(expected.foldMap(_.text), explicitName)(expected) + private def sanitize( text: String ) = text.replace(" ", "·").replace("\n", "↵") @@ -171,11 +181,8 @@ object ScannerTests extends SimpleIOSuite with Checkers { // complex cases - scanTest( - explicitName = "many tokens of punctuation and idents mixed with error nodes and comments", - input = - """{foo}[bar].baz,xx#:=abc123def ghe--eef //hello - |""".stripMargin, + scanTestReverse( + "many tokens of punctuation and idents mixed with error nodes and comments" )( List( TokenKind.LBR("{"), @@ -188,6 +195,7 @@ object ScannerTests extends SimpleIOSuite with Checkers { TokenKind.IDENT("baz"), TokenKind.COMMA(","), TokenKind.IDENT("xx"), + TokenKind.NEWLINE("\n"), TokenKind.HASH("#"), TokenKind.COLON(":"), TokenKind.EQ("="), @@ -197,6 +205,7 @@ object ScannerTests extends SimpleIOSuite with Checkers { TokenKind.Error("--"), TokenKind.IDENT("eef"), TokenKind.SPACE(" "), + TokenKind.NEWLINE("\n"), TokenKind.COMMENT("//hello"), TokenKind.NEWLINE("\n"), ) From a6782e02d1a27d5d701061f9c5f671db011adc7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Koz=C5=82owski?= Date: Mon, 2 Oct 2023 02:34:40 +0200 Subject: [PATCH 05/20] Comment out currently unsupported token --- .../src/main/scala/playground/smithyql/parser/v2/scanner.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala index 85c69630..c19931dc 100644 --- a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala +++ b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala @@ -23,7 +23,7 @@ sealed trait TokenKind extends Product with Serializable { } object TokenKind { - case object KW_IMPORT extends TokenKind + // case object KW_IMPORT extends TokenKind case object DOT extends TokenKind case object COMMA extends TokenKind case object HASH extends TokenKind From c69a352e51c3e6e3f5815762f476d63c989d69da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Koz=C5=82owski?= Date: Mon, 2 Oct 2023 02:46:30 +0200 Subject: [PATCH 06/20] Add keyword tokens --- .../scala/playground/smithyql/parser/v2/scanner.scala | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala index c19931dc..58cfac48 100644 --- a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala +++ b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala @@ -23,7 +23,13 @@ sealed trait TokenKind extends Product with Serializable { } object TokenKind { - // case object KW_IMPORT extends TokenKind + case object KW_USE extends TokenKind + case object KW_SERVICE extends TokenKind + case object KW_BOOLEAN extends TokenKind + case object KW_NUMBER extends TokenKind + case object KW_STRING extends TokenKind + case object KW_NULL extends TokenKind + case object DOT extends TokenKind case object COMMA extends TokenKind case object HASH extends TokenKind From d117fc20cbca201b0e0084dce07261406e8e0842 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Koz=C5=82owski?= Date: Mon, 2 Oct 2023 03:15:30 +0200 Subject: [PATCH 07/20] Rework error matching --- .../test/scala/playground/Assertions.scala | 2 +- .../smithyql/parser/v2/scanner.scala | 45 ++++++++++++------- .../smithyql/parser/v2/ScannerTests.scala | 26 +++++++++-- 3 files changed, 53 insertions(+), 20 deletions(-) diff --git a/modules/ast/src/test/scala/playground/Assertions.scala b/modules/ast/src/test/scala/playground/Assertions.scala index 9dd6bed4..23207841 100644 --- a/modules/ast/src/test/scala/playground/Assertions.scala +++ b/modules/ast/src/test/scala/playground/Assertions.scala @@ -23,7 +23,7 @@ object Assertions extends Expectations.Helpers { val stringWithResets = d.show()(conf).linesWithSeparators.map(Console.RESET + _).mkString failure( - s"Diff failed:\n${Console.RESET}(${conf.right("expected")}, ${conf.left("actual")})\n\n" + stringWithResets + s"Diff failed:\n${Console.RESET}(${conf.left("expected")}, ${conf.right("actual")})\n\n" + stringWithResets ) } diff --git a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala index 58cfac48..6fea2b15 100644 --- a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala +++ b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala @@ -66,9 +66,10 @@ object Scanner { def readSimple( token: Char, tok: TokenKind, - ): PartialFunction[Char, Unit] = { case `token` => - add(tok(token.toString)) - remaining = remaining.tail + ): PartialFunction[Unit, Unit] = { + case _ if remaining.startsWith(token.toString()) => + add(tok(token.toString)) + remaining = remaining.drop(token.toString().length()) } def simpleTokens( @@ -76,11 +77,9 @@ object Scanner { Char, TokenKind, )* - ): PartialFunction[Char, Unit] = pairings - .map(readSimple.tupled) - .reduce(_ orElse _) + ): PartialFunction[Unit, Unit] = pairings.map(readSimple.tupled).reduce(_.orElse(_)) - val readOne: PartialFunction[Char, Unit] = simpleTokens( + def readOne: PartialFunction[Unit, Unit] = simpleTokens( '.' -> TokenKind.DOT, ',' -> TokenKind.COMMA, '#' -> TokenKind.HASH, @@ -91,7 +90,7 @@ object Scanner { ':' -> TokenKind.COLON, '=' -> TokenKind.EQ, ).orElse { - case letter if letter.isLetter => + case _ if remaining.head.isLetter => val (letters, rest) = remaining.span(ch => ch.isLetterOrDigit || ch == '_') add(TokenKind.IDENT(letters)) remaining = rest @@ -145,8 +144,17 @@ object Scanner { ) = { // todo: bug: even if the next character starts a multi-char token, this will consider it an error. // instead, we should rework "readOne" to consume arbitrary constant-length tokens, and also include the possibility that `rest` has comments or whitespace. - val (failures, rest) = remaining.span(!readOne.isDefinedAt(_)) - remaining = rest + val (failures, _) = remaining.span { _ => + if (readOne.isDefinedAt(())) + // this will match. stop! + false + else { + // didn't match. We need to move the cursor manually here + remaining = remaining.tail + true + } + } + if (failures.nonEmpty) { add(TokenKind.Error(failures)) true @@ -157,13 +165,18 @@ object Scanner { while (remaining.nonEmpty) { val last = remaining - readOne.applyOrElse[Char, Any]( - remaining.head, - (_: Char) => - // nothing matched. Eat whitespace and see if the rest is an error - eatWhitespace() || eatComments() || eatErrors(), - ) + { + val matched = readOne.isDefinedAt(()) + if (matched) + readOne(()) + + matched + } || + eatWhitespace() || + eatComments() || + eatErrors(): Unit + // last-effort sanity check if (remaining == last) sys.error(s"no progress in the last run! remaining string: $remaining") } diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala index 36d2c4cf..f15b793d 100644 --- a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala +++ b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala @@ -2,8 +2,10 @@ package playground.smithyql.parser.v2 import cats.effect.IO import cats.implicits._ +import com.softwaremill.diffx.Diff import org.scalacheck.Arbitrary import org.scalacheck.Gen +import playground.Assertions import playground.smithyql.parser.v2.scanner.Scanner import playground.smithyql.parser.v2.scanner.Token import playground.smithyql.parser.v2.scanner.TokenKind @@ -14,6 +16,9 @@ import Scanner.scan object ScannerTests extends SimpleIOSuite with Checkers { + implicit val tokenKindDiff: Diff[TokenKind] = Diff.derived + implicit val tokenDiff: Diff[Token] = Diff.derived + def arbTests( name: TestName )( @@ -51,17 +56,19 @@ object ScannerTests extends SimpleIOSuite with Checkers { private def scanTest( input: String, - explicitName: String = "", + explicitName: TestName = "", )( expected: List[Token] + )( + implicit loc: SourceLocation ): Unit = pureTest( - if (explicitName.nonEmpty) + if (explicitName.name.nonEmpty) explicitName else "Scan string: " + sanitize(input) ) { - assert.eql(expected, scan(input)) + Assertions.assertNoDiff(scan(input), expected) } // Runs scanTest by first rendering the expected tokens to a string, then scanning it to get them back. @@ -72,6 +79,8 @@ object ScannerTests extends SimpleIOSuite with Checkers { explicitName: String )( expected: List[Token] + )( + implicit loc: SourceLocation ): Unit = scanTest(expected.foldMap(_.text), explicitName)(expected) private def sanitize( @@ -162,6 +171,17 @@ object ScannerTests extends SimpleIOSuite with Checkers { scanTest( explicitName = "Error tokens mixed between other tokens", + input = "hello@world", + )( + List( + TokenKind.IDENT("hello"), + TokenKind.Error("@"), + TokenKind.IDENT("world"), + ) + ) + + scanTest( + explicitName = "Error tokens mixed between other tokens - complex", input = "hello@world-this?is=an Date: Mon, 2 Oct 2023 03:26:12 +0200 Subject: [PATCH 08/20] Add support for multi-char keywords --- .../smithyql/parser/v2/scanner.scala | 53 +++++++++++++------ .../smithyql/parser/v2/ScannerTests.scala | 25 +++++++++ 2 files changed, 62 insertions(+), 16 deletions(-) diff --git a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala index 6fea2b15..e3a10929 100644 --- a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala +++ b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala @@ -64,38 +64,59 @@ object Scanner { ) = tokens ::= tok def readSimple( - token: Char, + token: String, tok: TokenKind, ): PartialFunction[Unit, Unit] = { - case _ if remaining.startsWith(token.toString()) => + case _ if remaining.startsWith(token) => add(tok(token.toString)) - remaining = remaining.drop(token.toString().length()) + remaining = remaining.drop(token.length()) } def simpleTokens( pairings: ( - Char, + String, TokenKind, )* ): PartialFunction[Unit, Unit] = pairings.map(readSimple.tupled).reduce(_.orElse(_)) - def readOne: PartialFunction[Unit, Unit] = simpleTokens( - '.' -> TokenKind.DOT, - ',' -> TokenKind.COMMA, - '#' -> TokenKind.HASH, - '[' -> TokenKind.LB, - ']' -> TokenKind.RB, - '{' -> TokenKind.LBR, - '}' -> TokenKind.RBR, - ':' -> TokenKind.COLON, - '=' -> TokenKind.EQ, - ).orElse { + val keywords = Map( + "use" -> TokenKind.KW_USE, + "service" -> TokenKind.KW_SERVICE, + "null" -> TokenKind.KW_NULL, + "true" -> TokenKind.KW_BOOLEAN, + "false" -> TokenKind.KW_BOOLEAN, + ) + + def readIdent: PartialFunction[Unit, Unit] = { case _ if remaining.head.isLetter => val (letters, rest) = remaining.span(ch => ch.isLetterOrDigit || ch == '_') - add(TokenKind.IDENT(letters)) + + keywords.get(letters) match { + case Some(kind) => + // we matched a keyword, return it. + add(kind(letters)) + case None => + // normal ident + add(TokenKind.IDENT(letters)) + } + remaining = rest } + def readPunctuation: PartialFunction[Unit, Unit] = simpleTokens( + "." -> TokenKind.DOT, + "," -> TokenKind.COMMA, + "#" -> TokenKind.HASH, + "[" -> TokenKind.LB, + "]" -> TokenKind.RB, + "{" -> TokenKind.LBR, + "}" -> TokenKind.RBR, + ":" -> TokenKind.COLON, + "=" -> TokenKind.EQ, + ) + + def readOne: PartialFunction[Unit, Unit] = readIdent.orElse(readPunctuation) + // split "whitespace" string into chains of contiguous newlines OR whitespace characters. def whitespaceChains( whitespace: String diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala index f15b793d..0eaa7271 100644 --- a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala +++ b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala @@ -97,6 +97,12 @@ object ScannerTests extends SimpleIOSuite with Checkers { scanTest(":")(List(TokenKind.COLON(":"))) scanTest("=")(List(TokenKind.EQ("="))) scanTest("a")(List(TokenKind.IDENT("a"))) + scanTest("use")(List(TokenKind.KW_USE("use"))) + scanTest("service")(List(TokenKind.KW_SERVICE("service"))) + scanTest("null")(List(TokenKind.KW_NULL("null"))) + scanTest("true")(List(TokenKind.KW_BOOLEAN("true"))) + scanTest("false")(List(TokenKind.KW_BOOLEAN("false"))) + // todo: number, string // idents scanTest("abcdef")(List(TokenKind.IDENT("abcdef"))) @@ -117,6 +123,18 @@ object ScannerTests extends SimpleIOSuite with Checkers { ) ) + scanTest(explicitName = "Identifier similar to a keyword - prefix", input = "notfalse")( + List( + TokenKind.IDENT("notfalse") + ) + ) + + scanTest(explicitName = "Identifier similar to a keyword - suffix", input = "falsely")( + List( + TokenKind.IDENT("falsely") + ) + ) + // whitespace scanTest(" ")(List(TokenKind.SPACE(" "))) scanTest("\n")(List(TokenKind.NEWLINE("\n"))) @@ -199,6 +217,13 @@ object ScannerTests extends SimpleIOSuite with Checkers { ) ) + scanTest(explicitName = "Error tokens before a multi-char keyword", input = "--false")( + List( + TokenKind.Error("--"), + TokenKind.KW_BOOLEAN("false"), + ) + ) + // complex cases scanTestReverse( From 6dd09d7dd45947d04ad10386efeb320bb34656ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Koz=C5=82owski?= Date: Mon, 2 Oct 2023 03:28:20 +0200 Subject: [PATCH 09/20] Add more complex cases --- .../smithyql/parser/v2/ScannerTests.scala | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala index 0eaa7271..2d54f6a4 100644 --- a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala +++ b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala @@ -255,4 +255,36 @@ object ScannerTests extends SimpleIOSuite with Checkers { TokenKind.NEWLINE("\n"), ) ) + + scanTest( + explicitName = "whitespace and comments around keyword", + input = + """hello use service foo //bar + | true //one + |//two + |null """.stripMargin, + )( + List( + TokenKind.IDENT("hello"), + TokenKind.SPACE(" "), + TokenKind.KW_USE("use"), + TokenKind.SPACE(" "), + TokenKind.KW_SERVICE("service"), + TokenKind.SPACE(" "), + TokenKind.IDENT("foo"), + TokenKind.SPACE(" "), + TokenKind.COMMENT("//bar"), + TokenKind.NEWLINE("\n"), + TokenKind.SPACE(" "), + TokenKind.KW_BOOLEAN("true"), + TokenKind.SPACE(" "), + TokenKind.COMMENT("//one"), + TokenKind.NEWLINE("\n"), + TokenKind.COMMENT("//two"), + TokenKind.NEWLINE("\n"), + TokenKind.KW_NULL("null"), + TokenKind.SPACE(" "), + ) + ) + } From 3afcf44495c10168c8846fc19197d8f733b578ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Koz=C5=82owski?= Date: Mon, 2 Oct 2023 03:29:56 +0200 Subject: [PATCH 10/20] cleanup --- .../main/scala/playground/smithyql/parser/v2/scanner.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala index e3a10929..9e1bb24d 100644 --- a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala +++ b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala @@ -87,7 +87,7 @@ object Scanner { "false" -> TokenKind.KW_BOOLEAN, ) - def readIdent: PartialFunction[Unit, Unit] = { + val readIdent: PartialFunction[Unit, Unit] = { case _ if remaining.head.isLetter => val (letters, rest) = remaining.span(ch => ch.isLetterOrDigit || ch == '_') @@ -95,6 +95,7 @@ object Scanner { case Some(kind) => // we matched a keyword, return it. add(kind(letters)) + case None => // normal ident add(TokenKind.IDENT(letters)) @@ -103,7 +104,7 @@ object Scanner { remaining = rest } - def readPunctuation: PartialFunction[Unit, Unit] = simpleTokens( + val readPunctuation: PartialFunction[Unit, Unit] = simpleTokens( "." -> TokenKind.DOT, "," -> TokenKind.COMMA, "#" -> TokenKind.HASH, @@ -115,7 +116,7 @@ object Scanner { "=" -> TokenKind.EQ, ) - def readOne: PartialFunction[Unit, Unit] = readIdent.orElse(readPunctuation) + val readOne: PartialFunction[Unit, Unit] = readIdent.orElse(readPunctuation) // split "whitespace" string into chains of contiguous newlines OR whitespace characters. def whitespaceChains( From fb0ee2e06bef7c64b03a6bad1a7df2e298f47013 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Koz=C5=82owski?= Date: Mon, 2 Oct 2023 03:40:37 +0200 Subject: [PATCH 11/20] Support string literals --- .../smithyql/parser/v2/scanner.scala | 20 ++++++- .../smithyql/parser/v2/ScannerTests.scala | 55 +++++++++++++++++++ 2 files changed, 72 insertions(+), 3 deletions(-) diff --git a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala index 9e1bb24d..6e396497 100644 --- a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala +++ b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala @@ -26,8 +26,8 @@ object TokenKind { case object KW_USE extends TokenKind case object KW_SERVICE extends TokenKind case object KW_BOOLEAN extends TokenKind - case object KW_NUMBER extends TokenKind - case object KW_STRING extends TokenKind + case object LIT_NUMBER extends TokenKind + case object LIT_STRING extends TokenKind case object KW_NULL extends TokenKind case object DOT extends TokenKind @@ -116,7 +116,21 @@ object Scanner { "=" -> TokenKind.EQ, ) - val readOne: PartialFunction[Unit, Unit] = readIdent.orElse(readPunctuation) + val readStringLiteral: PartialFunction[Unit, Unit] = { + case _ if remaining.startsWith("\"") => + val (str, rest) = remaining.tail.span(_ != '\"') + if (rest.isEmpty) { // hit EOF + add(TokenKind.LIT_STRING("\"" + str)) + remaining = rest + } else { + add(TokenKind.LIT_STRING("\"" + str + "\"")) + remaining = rest.tail + } + } + + val readOne: PartialFunction[Unit, Unit] = readIdent + .orElse(readPunctuation) + .orElse(readStringLiteral) // split "whitespace" string into chains of contiguous newlines OR whitespace characters. def whitespaceChains( diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala index 2d54f6a4..ed8c9ee4 100644 --- a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala +++ b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala @@ -287,4 +287,59 @@ object ScannerTests extends SimpleIOSuite with Checkers { ) ) + // string literals + scanTest( + "\"hello world\"" + )( + List( + TokenKind.LIT_STRING("\"hello world\"") + ) + ) + + scanTest( + explicitName = "String literal that never ends", + input = "\"hello world", + )( + List( + TokenKind.LIT_STRING("\"hello world") + ) + ) + + scanTest( + explicitName = "Multiple string literals", + input = "\"hello world\", \"foo bar\"", + )( + List( + TokenKind.LIT_STRING("\"hello world\""), + TokenKind.COMMA(","), + TokenKind.SPACE(" "), + TokenKind.LIT_STRING("\"foo bar\""), + ) + ) + + scanTest( + explicitName = "Multiple string literals, second one not closed", + input = "\"hello world\", \"foo bar", + )( + List( + TokenKind.LIT_STRING("\"hello world\""), + TokenKind.COMMA(","), + TokenKind.SPACE(" "), + TokenKind.LIT_STRING("\"foo bar"), + ) + ) + + scanTest( + explicitName = "Multiple string literals, first one not closed", + input = "\"hello world, \"foo bar\"", + )( + List( + TokenKind.LIT_STRING("\"hello world, \""), + TokenKind.IDENT("foo"), + TokenKind.SPACE(" "), + TokenKind.IDENT("bar"), + TokenKind.LIT_STRING("\""), + ) + ) + } From b7ac39e25852b7055c52012cf59a6ad501e125a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Koz=C5=82owski?= Date: Mon, 2 Oct 2023 03:41:34 +0200 Subject: [PATCH 12/20] Add test for multiline string --- .../playground/smithyql/parser/v2/ScannerTests.scala | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala index ed8c9ee4..e8dd8d96 100644 --- a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala +++ b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala @@ -342,4 +342,12 @@ object ScannerTests extends SimpleIOSuite with Checkers { ) ) + scanTest( + explicitName = "String literal, multi-line (parity test)", + input = "\"hello\nworld\"", + )( + List( + TokenKind.LIT_STRING("\"hello\nworld\"") + ) + ) } From de8e3ccc585142b027c26cdfcde3e8913521ae3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Koz=C5=82owski?= Date: Mon, 2 Oct 2023 03:46:30 +0200 Subject: [PATCH 13/20] Add test against real input --- .../smithyql/parser/v2/ScannerTests.scala | 120 ++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala index e8dd8d96..b4ba858d 100644 --- a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala +++ b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala @@ -9,6 +9,7 @@ import playground.Assertions import playground.smithyql.parser.v2.scanner.Scanner import playground.smithyql.parser.v2.scanner.Token import playground.smithyql.parser.v2.scanner.TokenKind +import playground.smithyql.parser.v2.scanner.TokenKind._ import weaver._ import weaver.scalacheck.Checkers @@ -350,4 +351,123 @@ object ScannerTests extends SimpleIOSuite with Checkers { TokenKind.LIT_STRING("\"hello\nworld\"") ) ) + + // real files + + scanTest( + explicitName = "Real file 1", + input = + """use service demo.smithy#DemoService + | + |// CreateSubscription { + |// subscription: { + |// id: "subscription_id", + |// name: "name", + |// createdAt: "2020-04-01T00:00:00Z", + |// }, + |// } + |CreateHero { + | hero: { + | good: // bgasdfasldf + | { + | howGood: 10, + | }, + | }, + | intSet: [ + | 1, + | 2, + | 1, + | ], + |} + |""".stripMargin, + )( + List( + TokenKind.KW_USE("use"), + TokenKind.SPACE(" "), + TokenKind.KW_SERVICE("service"), + TokenKind.SPACE(" "), + TokenKind.IDENT("demo"), + TokenKind.DOT("."), + TokenKind.IDENT("smithy"), + TokenKind.HASH("#"), + TokenKind.IDENT("DemoService"), + TokenKind.NEWLINE("\n\n"), + TokenKind.COMMENT("// CreateSubscription {"), + TokenKind.NEWLINE("\n"), + TokenKind.COMMENT("// subscription: {"), + TokenKind.NEWLINE("\n"), + TokenKind.COMMENT("// id: \"subscription_id\","), + TokenKind.NEWLINE("\n"), + TokenKind.COMMENT("// name: \"name\","), + TokenKind.NEWLINE("\n"), + TokenKind.COMMENT("// createdAt: \"2020-04-01T00:00:00Z\","), + TokenKind.NEWLINE("\n"), + TokenKind.COMMENT("// },"), + TokenKind.NEWLINE("\n"), + TokenKind.COMMENT("// }"), + TokenKind.NEWLINE("\n"), + TokenKind.IDENT("CreateHero"), + TokenKind.SPACE(" "), + TokenKind.LBR("{"), + TokenKind.NEWLINE("\n"), + TokenKind.SPACE(" "), + TokenKind.IDENT("hero"), + TokenKind.COLON(":"), + TokenKind.SPACE(" "), + TokenKind.LBR("{"), + TokenKind.NEWLINE("\n"), + TokenKind.SPACE(" "), + TokenKind.IDENT("good"), + TokenKind.COLON(":"), + TokenKind.SPACE(" "), + TokenKind.COMMENT("// bgasdfasldf"), + TokenKind.NEWLINE("\n"), + TokenKind.SPACE(" "), + TokenKind.LBR("{"), + TokenKind.NEWLINE("\n"), + TokenKind.SPACE(" "), + TokenKind.IDENT("howGood"), + TokenKind.COLON(":"), + TokenKind.SPACE(" "), + // bug: should be number + TokenKind.Error("10"), + TokenKind.COMMA(","), + TokenKind.NEWLINE("\n"), + TokenKind.SPACE(" "), + TokenKind.RBR("}"), + TokenKind.COMMA(","), + TokenKind.NEWLINE("\n"), + TokenKind.SPACE(" "), + TokenKind.RBR("}"), + TokenKind.COMMA(","), + TokenKind.NEWLINE("\n"), + TokenKind.SPACE(" "), + TokenKind.IDENT("intSet"), + TokenKind.COLON(":"), + TokenKind.SPACE(" "), + TokenKind.LB("["), + TokenKind.NEWLINE("\n"), + TokenKind.SPACE(" "), + // bug: should be a number + TokenKind.Error("1"), + TokenKind.COMMA(","), + TokenKind.NEWLINE("\n"), + TokenKind.SPACE(" "), + // bug: should be a number + TokenKind.Error("2"), + TokenKind.COMMA(","), + TokenKind.NEWLINE("\n"), + TokenKind.SPACE(" "), + // bug: should be a number + TokenKind.Error("1"), + TokenKind.COMMA(","), + TokenKind.NEWLINE("\n"), + TokenKind.SPACE(" "), + TokenKind.RB("]"), + TokenKind.COMMA(","), + TokenKind.NEWLINE("\n"), + TokenKind.RBR("}"), + TokenKind.NEWLINE("\n"), + ) + ) } From ac89ad3c1c6d83cbc8a4a15632278fd1271492c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Koz=C5=82owski?= Date: Mon, 2 Oct 2023 03:47:09 +0200 Subject: [PATCH 14/20] Import all syntax for less verbosity --- .../smithyql/parser/v2/ScannerTests.scala | 388 +++++++++--------- 1 file changed, 194 insertions(+), 194 deletions(-) diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala index b4ba858d..8fe51c00 100644 --- a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala +++ b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala @@ -88,31 +88,31 @@ object ScannerTests extends SimpleIOSuite with Checkers { text: String ) = text.replace(" ", "·").replace("\n", "↵") - scanTest("{")(List(TokenKind.LBR("{"))) - scanTest("}")(List(TokenKind.RBR("}"))) - scanTest("[")(List(TokenKind.LB("["))) - scanTest("]")(List(TokenKind.RB("]"))) - scanTest(".")(List(TokenKind.DOT("."))) - scanTest(",")(List(TokenKind.COMMA(","))) - scanTest("#")(List(TokenKind.HASH("#"))) - scanTest(":")(List(TokenKind.COLON(":"))) - scanTest("=")(List(TokenKind.EQ("="))) - scanTest("a")(List(TokenKind.IDENT("a"))) - scanTest("use")(List(TokenKind.KW_USE("use"))) - scanTest("service")(List(TokenKind.KW_SERVICE("service"))) - scanTest("null")(List(TokenKind.KW_NULL("null"))) - scanTest("true")(List(TokenKind.KW_BOOLEAN("true"))) - scanTest("false")(List(TokenKind.KW_BOOLEAN("false"))) + scanTest("{")(List(LBR("{"))) + scanTest("}")(List(RBR("}"))) + scanTest("[")(List(LB("["))) + scanTest("]")(List(RB("]"))) + scanTest(".")(List(DOT("."))) + scanTest(",")(List(COMMA(","))) + scanTest("#")(List(HASH("#"))) + scanTest(":")(List(COLON(":"))) + scanTest("=")(List(EQ("="))) + scanTest("a")(List(IDENT("a"))) + scanTest("use")(List(KW_USE("use"))) + scanTest("service")(List(KW_SERVICE("service"))) + scanTest("null")(List(KW_NULL("null"))) + scanTest("true")(List(KW_BOOLEAN("true"))) + scanTest("false")(List(KW_BOOLEAN("false"))) // todo: number, string // idents - scanTest("abcdef")(List(TokenKind.IDENT("abcdef"))) + scanTest("abcdef")(List(IDENT("abcdef"))) scanTest( "hello_world" )( List( - TokenKind.IDENT("hello_world") + IDENT("hello_world") ) ) @@ -120,41 +120,41 @@ object ScannerTests extends SimpleIOSuite with Checkers { "helloworld123" )( List( - TokenKind.IDENT("helloworld123") + IDENT("helloworld123") ) ) scanTest(explicitName = "Identifier similar to a keyword - prefix", input = "notfalse")( List( - TokenKind.IDENT("notfalse") + IDENT("notfalse") ) ) scanTest(explicitName = "Identifier similar to a keyword - suffix", input = "falsely")( List( - TokenKind.IDENT("falsely") + IDENT("falsely") ) ) // whitespace - scanTest(" ")(List(TokenKind.SPACE(" "))) - scanTest("\n")(List(TokenKind.NEWLINE("\n"))) + scanTest(" ")(List(SPACE(" "))) + scanTest("\n")(List(NEWLINE("\n"))) // contiguous whitespace of all kinds // notably newlines are grouped together separately from other whitespace - scanTest(" \r \r \n\n")(List(TokenKind.SPACE(" \r \r "), TokenKind.NEWLINE("\n\n"))) + scanTest(" \r \r \n\n")(List(SPACE(" \r \r "), NEWLINE("\n\n"))) scanTest(" \n\n \n ")( List( - TokenKind.SPACE(" "), - TokenKind.NEWLINE("\n\n"), - TokenKind.SPACE(" "), - TokenKind.NEWLINE("\n"), - TokenKind.SPACE(" "), + SPACE(" "), + NEWLINE("\n\n"), + SPACE(" "), + NEWLINE("\n"), + SPACE(" "), ) ) // comments - scanTest("// hello 123 foo bar --")(List(TokenKind.COMMENT("// hello 123 foo bar --"))) + scanTest("// hello 123 foo bar --")(List(COMMENT("// hello 123 foo bar --"))) scanTest( explicitName = "Scan multiple line-comments", @@ -163,9 +163,9 @@ object ScannerTests extends SimpleIOSuite with Checkers { |//world""".stripMargin, )( List( - TokenKind.COMMENT("//hello"), - TokenKind.NEWLINE("\n"), - TokenKind.COMMENT("//world"), + COMMENT("//hello"), + NEWLINE("\n"), + COMMENT("//world"), ) ) @@ -173,11 +173,11 @@ object ScannerTests extends SimpleIOSuite with Checkers { "hello world //this is a comment" )( List( - TokenKind.IDENT("hello"), - TokenKind.SPACE(" "), - TokenKind.IDENT("world"), - TokenKind.SPACE(" "), - TokenKind.COMMENT("//this is a comment"), + IDENT("hello"), + SPACE(" "), + IDENT("world"), + SPACE(" "), + COMMENT("//this is a comment"), ) ) @@ -186,16 +186,16 @@ object ScannerTests extends SimpleIOSuite with Checkers { scanTest( explicitName = "Error tokens for input that doesn't match any other token", input = "🤷*%$^@-+?", - )(List(TokenKind.Error("🤷*%$^@-+?"))) + )(List(Error("🤷*%$^@-+?"))) scanTest( explicitName = "Error tokens mixed between other tokens", input = "hello@world", )( List( - TokenKind.IDENT("hello"), - TokenKind.Error("@"), - TokenKind.IDENT("world"), + IDENT("hello"), + Error("@"), + IDENT("world"), ) ) @@ -204,24 +204,24 @@ object ScannerTests extends SimpleIOSuite with Checkers { input = "hello@world-this?is=an Date: Tue, 3 Oct 2023 03:28:59 +0200 Subject: [PATCH 15/20] No need for kinds --- .../main/scala/playground/plugins/PlaygroundPlugin.scala | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/modules/plugin-core/src/main/scala/playground/plugins/PlaygroundPlugin.scala b/modules/plugin-core/src/main/scala/playground/plugins/PlaygroundPlugin.scala index 03541224..b192fd6d 100644 --- a/modules/plugin-core/src/main/scala/playground/plugins/PlaygroundPlugin.scala +++ b/modules/plugin-core/src/main/scala/playground/plugins/PlaygroundPlugin.scala @@ -5,7 +5,6 @@ import org.http4s.client.Client import smithy4s.Service import smithy4s.UnsupportedProtocolError import smithy4s.http4s.SimpleProtocolBuilder -import smithy4s.kinds._ import java.util.ServiceLoader import scala.jdk.CollectionConverters._ @@ -36,7 +35,7 @@ trait SimpleHttpBuilder { def client[Alg[_[_, _, _, _, _]], F[_]: Concurrent]( service: Service[Alg], backend: Client[F], - ): Either[UnsupportedProtocolError, FunctorAlgebra[Alg, F]] + ): Either[UnsupportedProtocolError, service.Impl[F]] } @@ -50,8 +49,7 @@ object SimpleHttpBuilder { def client[Alg[_[_, _, _, _, _]], F[_]: Concurrent]( service: Service[Alg], backend: Client[F], - ): Either[UnsupportedProtocolError, FunctorAlgebra[Alg, F]] = - builder(service).client(backend).use + ): Either[UnsupportedProtocolError, service.Impl[F]] = builder(service).client(backend).make } From 9526d1a7a9e7fb21ef6fe604d4deb04e60729014 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Koz=C5=82owski?= Date: Tue, 3 Oct 2023 03:48:00 +0200 Subject: [PATCH 16/20] Support number literals --- .../smithyql/parser/v2/scanner.scala | 28 +++++++++ .../smithyql/parser/v2/ScannerTests.scala | 60 ++++++++++++++++--- 2 files changed, 79 insertions(+), 9 deletions(-) diff --git a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala index 6e396497..52c47e05 100644 --- a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala +++ b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala @@ -1,8 +1,11 @@ package playground.smithyql.parser.v2.scanner import cats.kernel.Eq +import cats.parse.Numbers import cats.syntax.all.* +import scala.annotation.nowarn + case class Token( kind: TokenKind, text: String, @@ -128,9 +131,34 @@ object Scanner { } } + val readNumberLiteral: PartialFunction[Unit, Unit] = { + // I love this language + object jsonNumber { + def unapply( + @nowarn("cat=unused") + unused: Unit + ): Option[ + ( + String, + String, + ) + ] = + // For now, we're using the cats-parse implementation simply because it's consistent with the current implementation + // and we can rewrite this later on when we drop support for the other parser + // and no longer need cats-parse. + Numbers.jsonNumber.parse(remaining).toOption + } + + { case jsonNumber(rest, num) => + add(TokenKind.LIT_NUMBER(num.toString)) + remaining = rest + } + } + val readOne: PartialFunction[Unit, Unit] = readIdent .orElse(readPunctuation) .orElse(readStringLiteral) + .orElse(readNumberLiteral) // split "whitespace" string into chains of contiguous newlines OR whitespace characters. def whitespaceChains( diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala index 8fe51c00..6bbb3d94 100644 --- a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala +++ b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala @@ -1,7 +1,9 @@ package playground.smithyql.parser.v2 +import cats.Show import cats.effect.IO import cats.implicits._ +import cats.parse.Numbers import com.softwaremill.diffx.Diff import org.scalacheck.Arbitrary import org.scalacheck.Gen @@ -103,7 +105,51 @@ object ScannerTests extends SimpleIOSuite with Checkers { scanTest("null")(List(KW_NULL("null"))) scanTest("true")(List(KW_BOOLEAN("true"))) scanTest("false")(List(KW_BOOLEAN("false"))) - // todo: number, string + + scanTest("5")(List(LIT_NUMBER("5"))) + scanTest("50")(List(LIT_NUMBER("50"))) + + // todo: this would be nice to parse as a single error token. + // might be possible to achieve by catching epsilon failures in the number parser, so that if any progress is seen we'd skip N characters before another token is attempted. + // need to test this for interactions with other following tokens (as well as error tokens before numbers, which are using readOne). + scanTest("05")(List(LIT_NUMBER("0"), LIT_NUMBER("5"))) + scanTest("0")(List(LIT_NUMBER("0"))) + scanTest("0.0")(List(LIT_NUMBER("0.0"))) + scanTest("0.5")(List(LIT_NUMBER("0.5"))) + // tbh: this might work better as a single error token. + // see above comment about epsilon failures. + scanTest("0.")(List(Error("0"), DOT("."))) + + scanTest("1e10")(List(LIT_NUMBER("1e10"))) + + private def numberTest[A: Arbitrary: Show]( + name: String + ) = + test(s"Any $name can be parsed as a number") { + forall { (a: A) => + Assertions.assertNoDiff(scan(a.toString()), List(LIT_NUMBER(a.toString()))) + } + } + + numberTest[Byte]("byte") + numberTest[Short]("short") + numberTest[Int]("int") + numberTest[Long]("long") + numberTest[Float]("float") + numberTest[Double]("double") + numberTest[BigInt]("bigint") + // deliberately not testing BigDecimal this way - these are wider than json numbers so we can't test the full range + + test("If cats-parse can parse a JSON number, so can we") { + forall { (s: String) => + Numbers.jsonNumber.parseAll(s).toOption match { + case None => success + case Some(succ) => + println("woop woop!") + Assertions.assertNoDiff(scan(succ), List(LIT_NUMBER(succ))) + } + } + } // idents scanTest("abcdef")(List(IDENT("abcdef"))) @@ -429,8 +475,7 @@ object ScannerTests extends SimpleIOSuite with Checkers { IDENT("howGood"), COLON(":"), SPACE(" "), - // bug: should be number - Error("10"), + LIT_NUMBER("10"), COMMA(","), NEWLINE("\n"), SPACE(" "), @@ -448,18 +493,15 @@ object ScannerTests extends SimpleIOSuite with Checkers { LB("["), NEWLINE("\n"), SPACE(" "), - // bug: should be a number - Error("1"), + LIT_NUMBER("1"), COMMA(","), NEWLINE("\n"), SPACE(" "), - // bug: should be a number - Error("2"), + LIT_NUMBER("2"), COMMA(","), NEWLINE("\n"), SPACE(" "), - // bug: should be a number - Error("1"), + LIT_NUMBER("1"), COMMA(","), NEWLINE("\n"), SPACE(" "), From b35bbf7b04719d48788b165dfb409fce59ba0f25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Koz=C5=82owski?= Date: Tue, 3 Oct 2023 04:00:33 +0200 Subject: [PATCH 17/20] Add parity test for scanner --- .../playground/smithyql/parser/ParserSuite.scala | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/ParserSuite.scala b/modules/parser/src/test/scala/playground/smithyql/parser/ParserSuite.scala index f0faa515..aa1aebf0 100644 --- a/modules/parser/src/test/scala/playground/smithyql/parser/ParserSuite.scala +++ b/modules/parser/src/test/scala/playground/smithyql/parser/ParserSuite.scala @@ -10,6 +10,8 @@ import io.circe.Decoder import io.circe.syntax._ import playground.Assertions._ import playground.smithyql._ +import playground.smithyql.parser.v2.scanner.Scanner +import playground.smithyql.parser.v2.scanner.TokenKind import weaver._ import java.nio.file @@ -52,6 +54,17 @@ trait ParserSuite extends SimpleIOSuite { } } } + + test(testCase.name + " (v2 scanner)") { + testCase.readInput(trimWhitespace).map { input => + val scanned = Scanner.scan(input) + + val errors = scanned.filter(_.kind == TokenKind.Error) + // non-empty inputs should parse to non-empty outputs + assert(input.isEmpty || scanned.nonEmpty) && + assert(errors.isEmpty) + } + } } def loadNegativeParserTests[Alg[_[_]]: SourceParser]( From 1b205d39677b4c1f794cd8b64649149beb0182b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Koz=C5=82owski?= Date: Tue, 3 Oct 2023 04:06:53 +0200 Subject: [PATCH 18/20] Also check negative cases --- .../playground/smithyql/parser/ParserSuite.scala | 14 +++++++++++++- .../negative/PreludeParserNegativeTests.scala | 2 +- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/ParserSuite.scala b/modules/parser/src/test/scala/playground/smithyql/parser/ParserSuite.scala index aa1aebf0..8e65bfc8 100644 --- a/modules/parser/src/test/scala/playground/smithyql/parser/ParserSuite.scala +++ b/modules/parser/src/test/scala/playground/smithyql/parser/ParserSuite.scala @@ -55,6 +55,13 @@ trait ParserSuite extends SimpleIOSuite { } } + validTokensTest(testCase, trimWhitespace) + } + + private def validTokensTest( + testCase: TestCase, + trimWhitespace: Boolean, + ) = test(testCase.name + " (v2 scanner)") { testCase.readInput(trimWhitespace).map { input => val scanned = Scanner.scan(input) @@ -65,11 +72,12 @@ trait ParserSuite extends SimpleIOSuite { assert(errors.isEmpty) } } - } + // invalidTokens: a flag that tells the suite whether the file should contain invalid tokens. def loadNegativeParserTests[Alg[_[_]]: SourceParser]( prefix: String, trimWhitespace: Boolean = false, + invalidTokens: Boolean, ): Unit = loadTestCases("", List("negative", prefix)).foreach { testCase => test(testCase.name) { testCase.readInput(trimWhitespace).map { input => @@ -79,6 +87,10 @@ trait ParserSuite extends SimpleIOSuite { } } } + + if (!invalidTokens) + validTokensTest(testCase, trimWhitespace) + } private def readText( diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/generative/negative/PreludeParserNegativeTests.scala b/modules/parser/src/test/scala/playground/smithyql/parser/generative/negative/PreludeParserNegativeTests.scala index 55c2f618..b4b258f3 100644 --- a/modules/parser/src/test/scala/playground/smithyql/parser/generative/negative/PreludeParserNegativeTests.scala +++ b/modules/parser/src/test/scala/playground/smithyql/parser/generative/negative/PreludeParserNegativeTests.scala @@ -4,5 +4,5 @@ import playground.smithyql.Prelude import playground.smithyql.parser.ParserSuite object PreludeParserNegativeTests extends ParserSuite { - loadNegativeParserTests[Prelude]("prelude", trimWhitespace = true) + loadNegativeParserTests[Prelude]("prelude", trimWhitespace = true, invalidTokens = false) } From 54f7a00a4cf9575f049dc05edbb73191a811786e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Koz=C5=82owski?= Date: Tue, 3 Oct 2023 04:11:57 +0200 Subject: [PATCH 19/20] Tiny simplification + comment --- .../scala/playground/smithyql/parser/v2/scanner.scala | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala index 52c47e05..229953ae 100644 --- a/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala +++ b/modules/parser/src/main/scala/playground/smithyql/parser/v2/scanner.scala @@ -155,6 +155,8 @@ object Scanner { } } + // readOne and friends are all partial functions: this is the current implementation of lookahead. + // it's not great, but it kinda works. val readOne: PartialFunction[Unit, Unit] = readIdent .orElse(readPunctuation) .orElse(readStringLiteral) @@ -229,13 +231,7 @@ object Scanner { while (remaining.nonEmpty) { val last = remaining - { - val matched = readOne.isDefinedAt(()) - if (matched) - readOne(()) - - matched - } || + readOne.lift(()).isDefined || eatWhitespace() || eatComments() || eatErrors(): Unit From dcf9e76459cfcfbaf24737751e2c80f0fb82b0d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Koz=C5=82owski?= Date: Tue, 3 Oct 2023 04:15:12 +0200 Subject: [PATCH 20/20] Split scanner suites --- .../playground/smithyql/parser/v2/Diffs.scala | 12 ++ .../parser/v2/ScannerExampleTests.scala | 119 ++++++++++++ .../smithyql/parser/v2/ScannerSuite.scala | 72 +++++++ .../smithyql/parser/v2/ScannerTests.scala | 182 +----------------- 4 files changed, 205 insertions(+), 180 deletions(-) create mode 100644 modules/parser/src/test/scala/playground/smithyql/parser/v2/Diffs.scala create mode 100644 modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerExampleTests.scala create mode 100644 modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerSuite.scala diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/v2/Diffs.scala b/modules/parser/src/test/scala/playground/smithyql/parser/v2/Diffs.scala new file mode 100644 index 00000000..1ddca3d8 --- /dev/null +++ b/modules/parser/src/test/scala/playground/smithyql/parser/v2/Diffs.scala @@ -0,0 +1,12 @@ +package playground.smithyql.parser.v2 + +import com.softwaremill.diffx.Diff +import playground.smithyql.parser.v2.scanner.Token +import playground.smithyql.parser.v2.scanner.TokenKind + +object Diffs { + + implicit val tokenKindDiff: Diff[TokenKind] = Diff.derived + implicit val tokenDiff: Diff[Token] = Diff.derived + +} diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerExampleTests.scala b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerExampleTests.scala new file mode 100644 index 00000000..d6680264 --- /dev/null +++ b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerExampleTests.scala @@ -0,0 +1,119 @@ +package playground.smithyql.parser.v2 + +import playground.smithyql.parser.v2.scanner.TokenKind._ +import weaver._ + +object ScannerExampleTests extends SimpleIOSuite with ScannerSuite { + scanTest( + explicitName = "Real file 1", + input = + """use service demo.smithy#DemoService + | + |// CreateSubscription { + |// subscription: { + |// id: "subscription_id", + |// name: "name", + |// createdAt: "2020-04-01T00:00:00Z", + |// }, + |// } + |CreateHero { + | hero: { + | good: // bgasdfasldf + | { + | howGood: 10, + | }, + | }, + | intSet: [ + | 1, + | 2, + | 1, + | ], + |} + |""".stripMargin, + )( + List( + KW_USE("use"), + SPACE(" "), + KW_SERVICE("service"), + SPACE(" "), + IDENT("demo"), + DOT("."), + IDENT("smithy"), + HASH("#"), + IDENT("DemoService"), + NEWLINE("\n\n"), + COMMENT("// CreateSubscription {"), + NEWLINE("\n"), + COMMENT("// subscription: {"), + NEWLINE("\n"), + COMMENT("// id: \"subscription_id\","), + NEWLINE("\n"), + COMMENT("// name: \"name\","), + NEWLINE("\n"), + COMMENT("// createdAt: \"2020-04-01T00:00:00Z\","), + NEWLINE("\n"), + COMMENT("// },"), + NEWLINE("\n"), + COMMENT("// }"), + NEWLINE("\n"), + IDENT("CreateHero"), + SPACE(" "), + LBR("{"), + NEWLINE("\n"), + SPACE(" "), + IDENT("hero"), + COLON(":"), + SPACE(" "), + LBR("{"), + NEWLINE("\n"), + SPACE(" "), + IDENT("good"), + COLON(":"), + SPACE(" "), + COMMENT("// bgasdfasldf"), + NEWLINE("\n"), + SPACE(" "), + LBR("{"), + NEWLINE("\n"), + SPACE(" "), + IDENT("howGood"), + COLON(":"), + SPACE(" "), + LIT_NUMBER("10"), + COMMA(","), + NEWLINE("\n"), + SPACE(" "), + RBR("}"), + COMMA(","), + NEWLINE("\n"), + SPACE(" "), + RBR("}"), + COMMA(","), + NEWLINE("\n"), + SPACE(" "), + IDENT("intSet"), + COLON(":"), + SPACE(" "), + LB("["), + NEWLINE("\n"), + SPACE(" "), + LIT_NUMBER("1"), + COMMA(","), + NEWLINE("\n"), + SPACE(" "), + LIT_NUMBER("2"), + COMMA(","), + NEWLINE("\n"), + SPACE(" "), + LIT_NUMBER("1"), + COMMA(","), + NEWLINE("\n"), + SPACE(" "), + RB("]"), + COMMA(","), + NEWLINE("\n"), + RBR("}"), + NEWLINE("\n"), + ) + ) +} diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerSuite.scala b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerSuite.scala new file mode 100644 index 00000000..4c78d373 --- /dev/null +++ b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerSuite.scala @@ -0,0 +1,72 @@ +package playground.smithyql.parser.v2 + +import cats.effect.IO +import cats.implicits._ +import org.scalacheck.Arbitrary +import org.scalacheck.Gen +import playground.Assertions +import playground.smithyql.parser.v2.scanner.Scanner +import playground.smithyql.parser.v2.scanner.Token +import weaver._ + +import Diffs._ +import Scanner.scan + +trait ScannerSuite { self: IOSuite => + + protected def arbTests( + name: TestName + )( + withArb: Arbitrary[String] => IO[Expectations] + ): Unit = { + + val sampleStringGen = Gen.oneOf( + Gen.alphaStr, + Gen.alphaNumStr, + Gen.asciiPrintableStr, + Gen.identifier, + Gen.oneOf(List(' ', '\n', '\t', '\r', '\f', '\b')).map(_.toString), + ) + + val arbString: Arbitrary[String] = Arbitrary { + Gen.listOf(sampleStringGen).map(_.mkString) + } + + test(name)(withArb(Arbitrary.arbString)) + test(name.copy(name = name.name + " (prepared input)"))(withArb(arbString)) + } + + protected def scanTest( + input: String, + explicitName: TestName = "", + )( + expected: List[Token] + )( + implicit loc: SourceLocation + ): Unit = + pureTest( + if (explicitName.name.nonEmpty) + explicitName + else + "Scan string: " + sanitize(input) + ) { + Assertions.assertNoDiff(scan(input), expected) + } + + // Runs scanTest by first rendering the expected tokens to a string, then scanning it to get them back. + // If the output is not the same as the input, the test fails. + // While it's guaranteed that rendering tokens to text produces scannable code (everything is scannable), + // due to ambiguities in the scanner it's not guaranteed that the output will be the same as the input - hence the need to test. + protected def scanTestReverse( + explicitName: String + )( + expected: List[Token] + )( + implicit loc: SourceLocation + ): Unit = scanTest(expected.foldMap(_.text), explicitName)(expected) + + protected def sanitize( + text: String + ) = text.replace(" ", "·").replace("\n", "↵") + +} diff --git a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala index 6bbb3d94..7009f55f 100644 --- a/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala +++ b/modules/parser/src/test/scala/playground/smithyql/parser/v2/ScannerTests.scala @@ -1,48 +1,19 @@ package playground.smithyql.parser.v2 import cats.Show -import cats.effect.IO import cats.implicits._ import cats.parse.Numbers -import com.softwaremill.diffx.Diff import org.scalacheck.Arbitrary -import org.scalacheck.Gen import playground.Assertions import playground.smithyql.parser.v2.scanner.Scanner -import playground.smithyql.parser.v2.scanner.Token -import playground.smithyql.parser.v2.scanner.TokenKind import playground.smithyql.parser.v2.scanner.TokenKind._ import weaver._ import weaver.scalacheck.Checkers +import Diffs._ import Scanner.scan -object ScannerTests extends SimpleIOSuite with Checkers { - - implicit val tokenKindDiff: Diff[TokenKind] = Diff.derived - implicit val tokenDiff: Diff[Token] = Diff.derived - - def arbTests( - name: TestName - )( - withArb: Arbitrary[String] => IO[Expectations] - ): Unit = { - - val sampleStringGen = Gen.oneOf( - Gen.alphaStr, - Gen.alphaNumStr, - Gen.asciiPrintableStr, - Gen.identifier, - Gen.oneOf(List(' ', '\n', '\t', '\r', '\f', '\b')).map(_.toString), - ) - - val arbString: Arbitrary[String] = Arbitrary { - Gen.listOf(sampleStringGen).map(_.mkString) - } - - test(name)(withArb(Arbitrary.arbString)) - test(name.copy(name = name.name + " (prepared input)"))(withArb(arbString)) - } +object ScannerTests extends SimpleIOSuite with Checkers with ScannerSuite { arbTests("Any string input scans successfully") { implicit arbString => forall { (s: String) => @@ -56,40 +27,6 @@ object ScannerTests extends SimpleIOSuite with Checkers { assert.eql(scan(s).foldMap(_.text), s) } } - - private def scanTest( - input: String, - explicitName: TestName = "", - )( - expected: List[Token] - )( - implicit loc: SourceLocation - ): Unit = - pureTest( - if (explicitName.name.nonEmpty) - explicitName - else - "Scan string: " + sanitize(input) - ) { - Assertions.assertNoDiff(scan(input), expected) - } - - // Runs scanTest by first rendering the expected tokens to a string, then scanning it to get them back. - // If the output is not the same as the input, the test fails. - // While it's guaranteed that rendering tokens to text produces scannable code (everything is scannable), - // due to ambiguities in the scanner it's not guaranteed that the output will be the same as the input - hence the need to test. - private def scanTestReverse( - explicitName: String - )( - expected: List[Token] - )( - implicit loc: SourceLocation - ): Unit = scanTest(expected.foldMap(_.text), explicitName)(expected) - - private def sanitize( - text: String - ) = text.replace(" ", "·").replace("\n", "↵") - scanTest("{")(List(LBR("{"))) scanTest("}")(List(RBR("}"))) scanTest("[")(List(LB("["))) @@ -397,119 +334,4 @@ object ScannerTests extends SimpleIOSuite with Checkers { LIT_STRING("\"hello\nworld\"") ) ) - - // real files - - scanTest( - explicitName = "Real file 1", - input = - """use service demo.smithy#DemoService - | - |// CreateSubscription { - |// subscription: { - |// id: "subscription_id", - |// name: "name", - |// createdAt: "2020-04-01T00:00:00Z", - |// }, - |// } - |CreateHero { - | hero: { - | good: // bgasdfasldf - | { - | howGood: 10, - | }, - | }, - | intSet: [ - | 1, - | 2, - | 1, - | ], - |} - |""".stripMargin, - )( - List( - KW_USE("use"), - SPACE(" "), - KW_SERVICE("service"), - SPACE(" "), - IDENT("demo"), - DOT("."), - IDENT("smithy"), - HASH("#"), - IDENT("DemoService"), - NEWLINE("\n\n"), - COMMENT("// CreateSubscription {"), - NEWLINE("\n"), - COMMENT("// subscription: {"), - NEWLINE("\n"), - COMMENT("// id: \"subscription_id\","), - NEWLINE("\n"), - COMMENT("// name: \"name\","), - NEWLINE("\n"), - COMMENT("// createdAt: \"2020-04-01T00:00:00Z\","), - NEWLINE("\n"), - COMMENT("// },"), - NEWLINE("\n"), - COMMENT("// }"), - NEWLINE("\n"), - IDENT("CreateHero"), - SPACE(" "), - LBR("{"), - NEWLINE("\n"), - SPACE(" "), - IDENT("hero"), - COLON(":"), - SPACE(" "), - LBR("{"), - NEWLINE("\n"), - SPACE(" "), - IDENT("good"), - COLON(":"), - SPACE(" "), - COMMENT("// bgasdfasldf"), - NEWLINE("\n"), - SPACE(" "), - LBR("{"), - NEWLINE("\n"), - SPACE(" "), - IDENT("howGood"), - COLON(":"), - SPACE(" "), - LIT_NUMBER("10"), - COMMA(","), - NEWLINE("\n"), - SPACE(" "), - RBR("}"), - COMMA(","), - NEWLINE("\n"), - SPACE(" "), - RBR("}"), - COMMA(","), - NEWLINE("\n"), - SPACE(" "), - IDENT("intSet"), - COLON(":"), - SPACE(" "), - LB("["), - NEWLINE("\n"), - SPACE(" "), - LIT_NUMBER("1"), - COMMA(","), - NEWLINE("\n"), - SPACE(" "), - LIT_NUMBER("2"), - COMMA(","), - NEWLINE("\n"), - SPACE(" "), - LIT_NUMBER("1"), - COMMA(","), - NEWLINE("\n"), - SPACE(" "), - RB("]"), - COMMA(","), - NEWLINE("\n"), - RBR("}"), - NEWLINE("\n"), - ) - ) }