Skip to content

Commit

Permalink
feat: Rust-style raw string literals
Browse files Browse the repository at this point in the history
For example, `r#"The word "this" is in quotes."#`.

Implements #1422
  • Loading branch information
kmill committed Nov 27, 2023
1 parent 190ac50 commit 04fbe9a
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 2 deletions.
18 changes: 17 additions & 1 deletion doc/lexical_structure.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ A Lean program consists of a stream of UTF-8 tokens where each token
is one of the following:

```
token: symbol | command | ident | string | char | numeral |
token: symbol | command | ident | string | raw_string | char | numeral |
: decimal | doc_comment | mod_doc_comment | field_notation
```

Expand Down Expand Up @@ -90,6 +90,22 @@ So the complete syntax is:
hex_char : [0-9a-fA-F]
```

Raw String Literals
===================

Raw string literals are string literals without any escape character processing.
They begin with `r##...#"` (with zero or more `#` characters) and end with `"#...##` (with the same number of `#` characters).
The contents of a raw string literal may contain `"##..#` so long as the number of `#` characters
is less than the number of `#` characters used to begin the raw string literal.

```
raw_string : raw_string_aux(0) | raw_string_aux(1) | raw_string_aux(2) | ...
raw_string_aux(n) : 'r' '#'{n} '"' raw_string_item '"' '#'{n}
raw_string_item(n) : raw_string_char | raw_string_quote(n)
raw_string_char : [^"]
raw_string_quote(n) : '"' '#'{0..n-1}
```

Char Literals
=============

Expand Down
13 changes: 12 additions & 1 deletion src/Init/Meta.lean
Original file line number Diff line number Diff line change
Expand Up @@ -786,8 +786,19 @@ partial def decodeStrLitAux (s : String) (i : String.Pos) (acc : String) : Optio
else
decodeStrLitAux s i (acc.push c)

partial def decodeRawStrLitAux (s : String) (i : String.Pos) (num : Nat) : Option String := do
let c := s.get i
let i := s.next i
if c == '#' then
decodeRawStrLitAux s i (num + 1)
else
s.extract i ⟨s.utf8ByteSize - (num + 1)⟩

def decodeStrLit (s : String) : Option String :=
decodeStrLitAux s ⟨1""
if s.get 0 == 'r' then
decodeRawStrLitAux s ⟨10
else
decodeStrLitAux s ⟨1""

def isStrLit? (stx : Syntax) : Option String :=
match isLit? strLitKind stx with
Expand Down
59 changes: 59 additions & 0 deletions src/Lean/Parser/Basic.lean
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,63 @@ partial def strLitFnAux (startPos : String.Pos) : ParserFn := fun c s =>
else if curr == '\\' then andthenFn quotedCharFn (strLitFnAux startPos) c s
else strLitFnAux startPos c s

/-- Raw strings have the syntax `r##...#"..."#...##` with zero or more `#`'s.
If this is a valid start to a raw string (`r##...#"`),
returns the parser state after the `"` along with the number of `#`'s -/
partial def rawStrLitStart (c : ParserContext) (s : ParserState) : Option (ParserState × Nat) :=
let input := c.input
let i := s.pos
if h : input.atEnd i then none
else
let curr := input.get' i h
if curr == 'r' then
loop (s.next' input i h) 0
else
none
where
loop (s : ParserState) (num : Nat) : Option (ParserState × Nat) :=
let input := c.input
let i := s.pos
if h : input.atEnd i then none
else
let curr := input.get' i h
if curr == '#' then
loop (s.next' input i h) (num + 1)
else if curr == '"' then
some (s.next' input i h, num)
else
none

/-- Parses a raw string literal after `rawStrLitStart` succeeds.
The `startPos` is the start of the raw string (at the `r`).
The number of hashes is `num`.
The parser state is assumed to be immediately after the first `"`.
If `closing?` is `some closingNum` then we are currently looking at `"##...#` with `closingNum` hashes. -/
partial def rawStrLitFnAux (startPos : String.Pos) (num : Nat) (closing? : Option Nat) : ParserFn := fun c s =>
let input := c.input
let i := s.pos
if h : input.atEnd i then s.mkUnexpectedErrorAt "unterminated raw string literal" startPos
else
let curr := input.get' i h
let s := s.setPos (input.next' i h)
if let some closingNum := closing? then
if curr == '#' then
if closingNum + 1 == num then
mkNodeToken strLitKind startPos c s
else
rawStrLitFnAux startPos num (some <| closingNum + 1) c s
else if curr == '\"' then
rawStrLitFnAux startPos num (some 0) c s
else
rawStrLitFnAux startPos num none c s
else if curr == '\"' then
if num == 0 then
mkNodeToken strLitKind startPos c s
else
rawStrLitFnAux startPos num (some 0) c s
else
rawStrLitFnAux startPos num none c s

def decimalNumberFn (startPos : String.Pos) (c : ParserContext) : ParserState → ParserState := fun s =>
let s := takeWhileFn (fun c => c.isDigit) c s
let input := c.input
Expand Down Expand Up @@ -820,6 +877,8 @@ private def tokenFnAux : ParserFn := fun c s =>
numberFnAux c s
else if curr == '`' && isIdFirstOrBeginEscape (getNext input i) then
nameLitAux i c s
else if let some (s, num) := rawStrLitStart c s then
rawStrLitFnAux i num none c s
else
let tk := c.tokens.matchPrefix input i
identFnAux i tk .anonymous c s
Expand Down
1 change: 1 addition & 0 deletions tests/lean/rawStringEOF.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#check r###"this is a raw string, unterminated"##
1 change: 1 addition & 0 deletions tests/lean/rawStringEOF.lean.expected.out
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
rawStringEOF.lean:1:7: error: unterminated raw string literal
16 changes: 16 additions & 0 deletions tests/lean/run/rawStrings.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
example : r"" = "" := rfl
example : r#""# = "" := rfl
example : r"hi" = "hi" := rfl
example : r#"hi"# = "hi" := rfl
example : r#""hello""# = "\"hello\"" := rfl
example : r#""hello" said the world"# = "\"hello\" said the world" := rfl
example : r#"""# = "\"" := rfl
example : r"\n" = "\\n" := rfl
example : r"\" = "\\" := rfl
example : r#"#"# = "#" := rfl
example : r##"need two #'s in "# to close"## = "need two #'s in \"# to close" := rfl
example : r##"foo #"# bar"## = "foo #\"# bar" := rfl

infix:100 " # " => Prod.mk

example : r#"a"##"b" = ("a", "b") := rfl

0 comments on commit 04fbe9a

Please sign in to comment.