Skip to content

Commit

Permalink
Merge pull request #4526 from rmosolgo/utf8-static-tokens
Browse files Browse the repository at this point in the history
Use utf-8 encoding for static strings in c lexer
  • Loading branch information
rmosolgo authored Jun 21, 2023
2 parents d8feb07 + e9a319a commit 5e001ff
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 6 deletions.
6 changes: 3 additions & 3 deletions graphql-c_parser/ext/graphql_c_parser_ext/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -868,7 +868,7 @@ void emit(TokenType tt, char *ts, char *te, Meta *meta) {
token_sym = ID2SYM(rb_intern("STRING"));
quotes_length = 3;
token_content = rb_utf8_str_new(ts + quotes_length, (te - ts - (2 * quotes_length)));
line_incr = FIX2INT(rb_funcall(token_content, rb_intern("count"), 1, rb_str_new_cstr("\n")));
line_incr = FIX2INT(rb_funcall(token_content, rb_intern("count"), 1, rb_utf8_str_new_cstr("\n")));
break;
case STRING:
// This is used only by the parser, this is never reached
Expand Down Expand Up @@ -1976,12 +1976,12 @@ VALUE tokenize(VALUE query_rbstr) {


#define SETUP_STATIC_TOKEN_VARIABLE(token_name, token_content) \
GraphQLTokenString##token_name = rb_str_new_cstr(token_content); \
GraphQLTokenString##token_name = rb_utf8_str_new_cstr(token_content); \
rb_funcall(GraphQLTokenString##token_name, rb_intern("-@"), 0); \
rb_global_variable(&GraphQLTokenString##token_name); \

#define SETUP_STATIC_STRING(var_name, str_content) \
var_name = rb_str_new_cstr(str_content); \
var_name = rb_utf8_str_new_cstr(str_content); \
rb_global_variable(&var_name); \
rb_str_freeze(var_name); \

Expand Down
6 changes: 3 additions & 3 deletions graphql-c_parser/ext/graphql_c_parser_ext/lexer.rl
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ void emit(TokenType tt, char *ts, char *te, Meta *meta) {
token_sym = ID2SYM(rb_intern("STRING"));
quotes_length = 3;
token_content = rb_utf8_str_new(ts + quotes_length, (te - ts - (2 * quotes_length)));
line_incr = FIX2INT(rb_funcall(token_content, rb_intern("count"), 1, rb_str_new_cstr("\n")));
line_incr = FIX2INT(rb_funcall(token_content, rb_intern("count"), 1, rb_utf8_str_new_cstr("\n")));
break;
case STRING:
// This is used only by the parser, this is never reached
Expand Down Expand Up @@ -359,12 +359,12 @@ VALUE tokenize(VALUE query_rbstr) {


#define SETUP_STATIC_TOKEN_VARIABLE(token_name, token_content) \
GraphQLTokenString##token_name = rb_str_new_cstr(token_content); \
GraphQLTokenString##token_name = rb_utf8_str_new_cstr(token_content); \
rb_funcall(GraphQLTokenString##token_name, rb_intern("-@"), 0); \
rb_global_variable(&GraphQLTokenString##token_name); \

#define SETUP_STATIC_STRING(var_name, str_content) \
var_name = rb_str_new_cstr(str_content); \
var_name = rb_utf8_str_new_cstr(str_content); \
rb_global_variable(&var_name); \
rb_str_freeze(var_name); \

Expand Down
6 changes: 6 additions & 0 deletions spec/graphql/language/lexer_examples.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ def self.included(child_mod)
assert_equal :BAD_UNICODE_ESCAPE, subject.tokenize(string).first.name
end

it "makes utf-8 arguments named type" do
str = "{ a(type: 1) }"
tokens = subject.tokenize(str)
assert_equal Encoding::UTF_8, tokens[2].value.encoding
end

it "makes utf-8 comments" do
tokens = subject.tokenize("# 不要!\n{")
comment_token = tokens.first.prev_token
Expand Down

0 comments on commit 5e001ff

Please sign in to comment.