From b701e85e94e7a957668fb93ff563fa125378b9c7 Mon Sep 17 00:00:00 2001 From: Antonio Nuno Monteiro Date: Tue, 9 Jan 2024 20:45:35 -0800 Subject: [PATCH 1/2] feat: support (OCaml syntax) expressions in `{j| .. |j}` interpolation --- jscomp/common/utf8_string.ml | 119 ++++++++++++++++------- jscomp/common/utf8_string.mli | 2 +- test/blackbox-tests/utf8-string-interp.t | 56 +++++++++++ test/unit-tests/ounit_unicode_tests.ml | 6 +- 4 files changed, 143 insertions(+), 40 deletions(-) diff --git a/jscomp/common/utf8_string.ml b/jscomp/common/utf8_string.ml index d2e8c4b9a..cc60e6c2b 100644 --- a/jscomp/common/utf8_string.ml +++ b/jscomp/common/utf8_string.ml @@ -220,8 +220,8 @@ module Interp = struct | Unmatched_paren | Invalid_syntax_of_var of string - type kind = String | Var of int * int - (* [Var (loffset, roffset)] + type kind = String | Expr of int * int + (* [Expr (loffset, roffset)] For parens it used to be (2,-1) for non-parens it used to be (1,0) *) @@ -331,51 +331,73 @@ module Interp = struct }, error )) + let empty_segment_cxt cxt = + (* Move the position back 2 characters "$(" if this is the empty + interpolation. *) + { + cxt with + segment_start = + { + cxt.segment_start with + offset = (match cxt.segment_start.offset with 0 -> 0 | n -> n - 3); + byte_bol = + (match cxt.segment_start.byte_bol with 0 -> 0 | n -> n - 3); + }; + pos_bol = cxt.pos_bol + 3; + byte_bol = cxt.byte_bol + 3; + } + let add_var_segment cxt loc loffset roffset = let content = Buffer.contents cxt.buf in Buffer.clear cxt.buf; - let next_loc = - { - lnum = cxt.pos_lnum; - offset = loc - cxt.pos_bol; - byte_bol = cxt.byte_bol; - } - in if valid_identifier content then ( + let next_loc = + { + lnum = cxt.pos_lnum; + offset = loc - cxt.pos_bol; + byte_bol = cxt.byte_bol; + } + in cxt.segments <- { start = cxt.segment_start; finish = next_loc; - kind = Var (loffset, roffset); + kind = Expr (loffset, roffset); content; } :: cxt.segments; cxt.segment_start <- next_loc) else let cxt = - match String.trim content with - | "" -> - (* Move the position back 2 characters "$(" if this is the empty - interpolation. *) - { - cxt with - segment_start = - { - cxt.segment_start with - offset = - (match cxt.segment_start.offset with 0 -> 0 | n -> n - 3); - byte_bol = - (match cxt.segment_start.byte_bol with - | 0 -> 0 - | n -> n - 3); - }; - pos_bol = cxt.pos_bol + 3; - byte_bol = cxt.byte_bol + 3; - } - | _ -> cxt + match String.trim content with "" -> empty_segment_cxt cxt | _ -> cxt in pos_error cxt ~loc (Invalid_syntax_of_var content) + let add_expr_segment cxt loc loffset roffset = + let content = Buffer.contents cxt.buf in + Buffer.clear cxt.buf; + match String.trim content with + | "" -> + let cxt = empty_segment_cxt cxt in + pos_error cxt ~loc (Invalid_syntax_of_var content) + | _ -> + let next_loc = + { + lnum = cxt.pos_lnum; + offset = loc - cxt.pos_bol; + byte_bol = cxt.byte_bol; + } + in + cxt.segments <- + { + start = cxt.segment_start; + finish = next_loc; + kind = Expr (loffset, roffset); + content; + } + :: cxt.segments; + cxt.segment_start <- next_loc + let add_str_segment cxt loc = let content = Buffer.contents cxt.buf in Buffer.clear cxt.buf; @@ -453,15 +475,25 @@ module Interp = struct and expect_var_paren loc s offset ({ buf; s_len; _ } as cxt) = let v = ref offset in - while !v < s_len && s.[!v] <> ')' do + let opening_parens = ref [] in + let stop = ref false in + while !v < s_len && not !stop do let cur_char = s.[!v] in - Buffer.add_char buf cur_char; - incr v + + (if cur_char = ')' then + match !opening_parens with + | [] -> stop := true + | _ :: rest -> opening_parens := rest); + + if not !stop then ( + if cur_char = '(' then opening_parens := cur_char :: !opening_parens; + Buffer.add_char buf cur_char; + incr v) done; let added_length = !v - offset in let loc = added_length + 1 + loc in if !v < s_len && s.[!v] = ')' then ( - add_var_segment cxt loc 2 (-1); + add_expr_segment cxt loc 2 (-1); check_and_transform loc s (added_length + 1 + offset) cxt) else pos_error cxt ~loc Unmatched_paren @@ -525,7 +557,7 @@ module Interp = struct | String -> let loc = update border start finish loc in Exp.constant (Pconst_string (content, loc, escaped_j_delimiter)) - | Var (soffset, foffset) -> + | Expr (soffset, foffset) -> ( let loc = { loc with @@ -535,7 +567,22 @@ module Interp = struct update_position (foffset + border) finish loc.loc_start; } in - Exp.ident ~loc { loc; txt = Lident content }) + let lexbuf = Lexing.from_string content in + try { (Parse.expression lexbuf) with pexp_loc = loc } + with Syntaxerr.Error err -> + let err = + match err with + | Unclosed _ -> Syntaxerr.Other loc + | Expecting (_, s) -> Expecting (loc, s) + | Not_expecting (_, s) -> Not_expecting (loc, s) + | Applicative_path _ -> Applicative_path loc + | Variable_in_scope (_, s) -> Variable_in_scope (loc, s) + | Other _ -> Other loc + | Ill_formed_ast (_, s) -> Ill_formed_ast (loc, s) + | Invalid_package_type (_, s) -> Invalid_package_type (loc, s) + | Removed_string_set _ -> Removed_string_set loc + in + raise (Syntaxerr.Error err))) in let concat_exp a_loc x ~(lhs : Parsetree.expression) = let loc = merge_loc a_loc lhs.pexp_loc in diff --git a/jscomp/common/utf8_string.mli b/jscomp/common/utf8_string.mli index 7ef67afb7..607fdf6c6 100644 --- a/jscomp/common/utf8_string.mli +++ b/jscomp/common/utf8_string.mli @@ -59,7 +59,7 @@ module Interp : sig | Unmatched_paren | Invalid_syntax_of_var of string - type kind = String | Var of int * int + type kind = String | Expr of int * int (* Note the position is about code point *) type pos = { diff --git a/test/blackbox-tests/utf8-string-interp.t b/test/blackbox-tests/utf8-string-interp.t index 908f91afa..ec5aea8f7 100644 --- a/test/blackbox-tests/utf8-string-interp.t +++ b/test/blackbox-tests/utf8-string-interp.t @@ -35,3 +35,59 @@ Error: This expression has type int but an expression was expected of type string [2] + +`{j| .. |j}` interpolation with expressions + + $ cat > x.ml < let x = + > let y = 3 in + > {j| Hello, \$(y + y |> Js.String.make)|j} + > EOF + $ melc -ppx melppx x.ml + // Generated by Melange + 'use strict'; + + + var x = " Hello, " + String(6); + + exports.x = x; + /* x Not a pure module */ + +Interpolation expression errors + + $ cat > x.ml < let x = + > let y = 3 in + > {j| Hello, \$(y + y |>)|j} + > EOF + $ melc -ppx melppx x.ml + File "x.ml", line 3, characters 15-23: + 3 | {j| Hello, $(y + y |>)|j} + ^^^^^^^^ + Error: Syntax error + [2] + + $ cat > x.ml < let x = + > let y = 3 in + > {j| Hello, \$(y + y)|j} + > EOF + $ melc -ppx melppx x.ml + File "x.ml", line 3, characters 15-20: + 3 | {j| Hello, $(y + y)|j} + ^^^^^ + Error: This expression has type int but an expression was expected of type + string + [2] + + $ cat > x.ml < let x = + > let y = 3 in + > {j| Hello, \$(y +)|j} + > EOF + $ melc -ppx melppx x.ml + File "x.ml", line 3, characters 15-18: + 3 | {j| Hello, $(y +)|j} + ^^^ + Error: Syntax error + [2] diff --git a/test/unit-tests/ounit_unicode_tests.ml b/test/unit-tests/ounit_unicode_tests.ml index f3df194cb..29ed6bfce 100644 --- a/test/unit-tests/ounit_unicode_tests.ml +++ b/test/unit-tests/ounit_unicode_tests.ml @@ -3,7 +3,7 @@ let ( =~ ) a b = OUnit.assert_equal ~cmp:String.equal a b module Utf8_string = Melange_ffi.Utf8_string -(* Note [Var] kind can not be mpty *) +(* Note [Expr] kind can not be mpty *) let empty_segment { Utf8_string.Interp.Private.content; _ } = String.length content = 0 @@ -38,8 +38,8 @@ let ( ==* ) a b = in OUnit.assert_equal segments b -let varParen : Utf8_string.Interp.kind = Var (2, -1) -let var : Utf8_string.Interp.kind = Var (1, 0) +let varParen : Utf8_string.Interp.kind = Expr (2, -1) +let var : Utf8_string.Interp.kind = Expr (1, 0) let suites = __FILE__ From dba2f981113f5721abccdd3ea2fdc0c9d7e424dd Mon Sep 17 00:00:00 2001 From: Antonio Nuno Monteiro Date: Tue, 9 Jan 2024 21:30:01 -0800 Subject: [PATCH 2/2] chore: remove / port tests that should live elsewhere --- test/blackbox-tests/utf8-string-interp.t | 12 ++++++++++ test/unit-tests/ounit_unicode_tests.ml | 29 ------------------------ 2 files changed, 12 insertions(+), 29 deletions(-) diff --git a/test/blackbox-tests/utf8-string-interp.t b/test/blackbox-tests/utf8-string-interp.t index ec5aea8f7..b7830800c 100644 --- a/test/blackbox-tests/utf8-string-interp.t +++ b/test/blackbox-tests/utf8-string-interp.t @@ -91,3 +91,15 @@ Interpolation expression errors ^^^ Error: Syntax error [2] + + $ cat > x.ml < let x = + > {j|\$(()) |j} + > EOF + $ melc -ppx melppx x.ml + File "x.ml", line 2, characters 7-9: + 2 | {j|$(()) |j} + ^^ + Error: This expression has type unit but an expression was expected of type + string + [2] diff --git a/test/unit-tests/ounit_unicode_tests.ml b/test/unit-tests/ounit_unicode_tests.ml index 29ed6bfce..a7df6b67b 100644 --- a/test/unit-tests/ounit_unicode_tests.ml +++ b/test/unit-tests/ounit_unicode_tests.ml @@ -167,15 +167,6 @@ let suites = ] ); ( __LOC__ >:: fun _ -> {|$x)|} ==* [ (0, 0, 0, 2, var, "x"); (0, 2, 0, 3, String, ")") ] ); - ( __LOC__ >:: fun _ -> - match Utf8_string.Interp.Private.transform_test {j| $( ()) |j} with - | exception - Utf8_string.Interp.Error - ( { lnum = 0; offset = 1; byte_bol = 0 }, - { lnum = 0; offset = 6; byte_bol = 0 }, - Invalid_syntax_of_var " (" ) -> - OUnit.assert_bool __LOC__ true - | _ -> OUnit.assert_bool __LOC__ false ); ( __LOC__ >:: fun _ -> match Utf8_string.Interp.Private.transform_test {|$ ()|} with | exception @@ -194,26 +185,6 @@ let suites = Invalid_syntax_of_var "" ) -> OUnit.assert_bool __LOC__ true | _ -> OUnit.assert_bool __LOC__ false ); - ( __LOC__ >:: fun _ -> - match - Utf8_string.Interp.Private.transform_test {|$(hello world)|} - with - | exception - Utf8_string.Interp.Error - ( { lnum = 0; offset = 0; byte_bol = 0 }, - { lnum = 0; offset = 14; byte_bol = 0 }, - Invalid_syntax_of_var "hello world" ) -> - OUnit.assert_bool __LOC__ true - | _ -> OUnit.assert_bool __LOC__ false ); - ( __LOC__ >:: fun _ -> - match Utf8_string.Interp.Private.transform_test {|$( hi*) |} with - | exception - Utf8_string.Interp.Error - ( { lnum = 0; offset = 0; byte_bol = 0 }, - { lnum = 0; offset = 7; byte_bol = 0 }, - Invalid_syntax_of_var " hi*" ) -> - OUnit.assert_bool __LOC__ true - | _ -> OUnit.assert_bool __LOC__ false ); ( __LOC__ >:: fun _ -> match Utf8_string.Interp.Private.transform_test {|xx $|} with | exception