From e9a319a43fa3bf1c9304bbe67e2b81c246d59672 Mon Sep 17 00:00:00 2001 From: Robert Mosolgo Date: Tue, 20 Jun 2023 16:32:49 -0400 Subject: [PATCH] Use utf-8 encoding for static strings in c lexer --- graphql-c_parser/ext/graphql_c_parser_ext/lexer.c | 6 +++--- graphql-c_parser/ext/graphql_c_parser_ext/lexer.rl | 6 +++--- spec/graphql/language/lexer_examples.rb | 6 ++++++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/graphql-c_parser/ext/graphql_c_parser_ext/lexer.c b/graphql-c_parser/ext/graphql_c_parser_ext/lexer.c index 1433f8993b..e0f0c1f2d5 100644 --- a/graphql-c_parser/ext/graphql_c_parser_ext/lexer.c +++ b/graphql-c_parser/ext/graphql_c_parser_ext/lexer.c @@ -868,7 +868,7 @@ void emit(TokenType tt, char *ts, char *te, Meta *meta) { token_sym = ID2SYM(rb_intern("STRING")); quotes_length = 3; token_content = rb_utf8_str_new(ts + quotes_length, (te - ts - (2 * quotes_length))); - line_incr = FIX2INT(rb_funcall(token_content, rb_intern("count"), 1, rb_str_new_cstr("\n"))); + line_incr = FIX2INT(rb_funcall(token_content, rb_intern("count"), 1, rb_utf8_str_new_cstr("\n"))); break; case STRING: // This is used only by the parser, this is never reached @@ -1976,12 +1976,12 @@ VALUE tokenize(VALUE query_rbstr) { #define SETUP_STATIC_TOKEN_VARIABLE(token_name, token_content) \ -GraphQLTokenString##token_name = rb_str_new_cstr(token_content); \ +GraphQLTokenString##token_name = rb_utf8_str_new_cstr(token_content); \ rb_funcall(GraphQLTokenString##token_name, rb_intern("-@"), 0); \ rb_global_variable(&GraphQLTokenString##token_name); \ #define SETUP_STATIC_STRING(var_name, str_content) \ -var_name = rb_str_new_cstr(str_content); \ +var_name = rb_utf8_str_new_cstr(str_content); \ rb_global_variable(&var_name); \ rb_str_freeze(var_name); \ diff --git a/graphql-c_parser/ext/graphql_c_parser_ext/lexer.rl b/graphql-c_parser/ext/graphql_c_parser_ext/lexer.rl index b3f36024ea..edadb4b4d1 100644 --- a/graphql-c_parser/ext/graphql_c_parser_ext/lexer.rl +++ b/graphql-c_parser/ext/graphql_c_parser_ext/lexer.rl @@ -282,7 +282,7 @@ void emit(TokenType tt, char *ts, char *te, Meta *meta) { token_sym = ID2SYM(rb_intern("STRING")); quotes_length = 3; token_content = rb_utf8_str_new(ts + quotes_length, (te - ts - (2 * quotes_length))); - line_incr = FIX2INT(rb_funcall(token_content, rb_intern("count"), 1, rb_str_new_cstr("\n"))); + line_incr = FIX2INT(rb_funcall(token_content, rb_intern("count"), 1, rb_utf8_str_new_cstr("\n"))); break; case STRING: // This is used only by the parser, this is never reached @@ -359,12 +359,12 @@ VALUE tokenize(VALUE query_rbstr) { #define SETUP_STATIC_TOKEN_VARIABLE(token_name, token_content) \ - GraphQLTokenString##token_name = rb_str_new_cstr(token_content); \ + GraphQLTokenString##token_name = rb_utf8_str_new_cstr(token_content); \ rb_funcall(GraphQLTokenString##token_name, rb_intern("-@"), 0); \ rb_global_variable(&GraphQLTokenString##token_name); \ #define SETUP_STATIC_STRING(var_name, str_content) \ - var_name = rb_str_new_cstr(str_content); \ + var_name = rb_utf8_str_new_cstr(str_content); \ rb_global_variable(&var_name); \ rb_str_freeze(var_name); \ diff --git a/spec/graphql/language/lexer_examples.rb b/spec/graphql/language/lexer_examples.rb index 02a78839db..ad264e991b 100644 --- a/spec/graphql/language/lexer_examples.rb +++ b/spec/graphql/language/lexer_examples.rb @@ -58,6 +58,12 @@ def self.included(child_mod) assert_equal :BAD_UNICODE_ESCAPE, subject.tokenize(string).first.name end + it "makes utf-8 arguments named type" do + str = "{ a(type: 1) }" + tokens = subject.tokenize(str) + assert_equal Encoding::UTF_8, tokens[2].value.encoding + end + it "makes utf-8 comments" do tokens = subject.tokenize("# 不要!\n{") comment_token = tokens.first.prev_token