From 86c34f93b1693dae9d645dda62791fa3010a9235 Mon Sep 17 00:00:00 2001 From: Masataka Pocke Kuwabara Date: Fri, 5 Apr 2024 17:29:32 +0900 Subject: [PATCH 1/4] Omit unnecessary field from location range --- ext/rbs_extension/location.c | 61 ++++++++++++++++++------------------ ext/rbs_extension/location.h | 9 ++++-- 2 files changed, 37 insertions(+), 33 deletions(-) diff --git a/ext/rbs_extension/location.c b/ext/rbs_extension/location.c index 3f6f70190..edbe7b8ef 100644 --- a/ext/rbs_extension/location.c +++ b/ext/rbs_extension/location.c @@ -3,7 +3,9 @@ #define RBS_LOC_REQUIRED_P(loc, i) ((loc)->children->required_p & (1 << (i))) #define RBS_LOC_OPTIONAL_P(loc, i) (!RBS_LOC_REQUIRED_P((loc), (i))) #define RBS_LOC_CHILDREN_SIZE(cap) (sizeof(rbs_loc_children) + sizeof(rbs_loc_entry) * ((cap) - 1)) +#define NULL_LOC_RANGE_P(rg) ((rg).start == -1) +rbs_loc_range RBS_LOC_NULL_RANGE = { -1, -1 }; VALUE RBS_Location; position rbs_loc_position(int char_pos) { @@ -16,6 +18,11 @@ position rbs_loc_position3(int char_pos, int line, int column) { return pos; } +rbs_loc_range rbs_new_loc_range(range rg) { + rbs_loc_range r = { rg.start.char_pos, rg.end.char_pos }; + return r; +} + static void check_children_max(unsigned short n) { size_t max = sizeof(rbs_loc_entry_bitmap) * 8; if (n > max) { @@ -51,7 +58,7 @@ void rbs_loc_add_required_child(rbs_loc *loc, ID name, range r) { unsigned short i = loc->children->len++; loc->children->entries[i].name = name; - loc->children->entries[i].rg = r; + loc->children->entries[i].rg = rbs_new_loc_range(r); loc->children->required_p |= 1 << i; } @@ -61,10 +68,10 @@ void rbs_loc_add_optional_child(rbs_loc *loc, ID name, range r) { unsigned short i = loc->children->len++; loc->children->entries[i].name = name; - loc->children->entries[i].rg = r; + loc->children->entries[i].rg = rbs_new_loc_range(r); } -void rbs_loc_init(rbs_loc *loc, VALUE buffer, range rg) { +void rbs_loc_init(rbs_loc *loc, VALUE buffer, rbs_loc_range rg) { loc->buffer = buffer; loc->rg = rg; loc->children = NULL; @@ -100,7 +107,7 @@ static VALUE location_s_allocate(VALUE klass) { rbs_loc *loc; VALUE obj = TypedData_Make_Struct(klass, rbs_loc, &location_type, loc); - rbs_loc_init(loc, Qnil, NULL_RANGE); + rbs_loc_init(loc, Qnil, RBS_LOC_NULL_RANGE); return obj; } @@ -112,8 +119,8 @@ rbs_loc *rbs_check_location(VALUE obj) { static VALUE location_initialize(VALUE self, VALUE buffer, VALUE start_pos, VALUE end_pos) { rbs_loc *loc = rbs_check_location(self); - position start = rbs_loc_position(FIX2INT(start_pos)); - position end = rbs_loc_position(FIX2INT(end_pos)); + int start = FIX2INT(start_pos); + int end = FIX2INT(end_pos); loc->buffer = buffer; loc->rg.start = start; @@ -143,38 +150,21 @@ static VALUE location_buffer(VALUE self) { static VALUE location_start_pos(VALUE self) { rbs_loc *loc = rbs_check_location(self); - return INT2FIX(loc->rg.start.char_pos); + return INT2FIX(loc->rg.start); } static VALUE location_end_pos(VALUE self) { rbs_loc *loc = rbs_check_location(self); - return INT2FIX(loc->rg.end.char_pos); + return INT2FIX(loc->rg.end); } +// TODO: remove it static VALUE location_start_loc(VALUE self) { - rbs_loc *loc = rbs_check_location(self); - - if (loc->rg.start.line >= 0) { - VALUE pair = rb_ary_new_capa(2); - rb_ary_push(pair, INT2FIX(loc->rg.start.line)); - rb_ary_push(pair, INT2FIX(loc->rg.start.column)); - return pair; - } else { - return Qnil; - } + return Qnil; } static VALUE location_end_loc(VALUE self) { - rbs_loc *loc = rbs_check_location(self); - - if (loc->rg.end.line >= 0) { - VALUE pair = rb_ary_new_capa(2); - rb_ary_push(pair, INT2FIX(loc->rg.end.line)); - rb_ary_push(pair, INT2FIX(loc->rg.end.column)); - return pair; - } else { - return Qnil; - } + return Qnil; } static VALUE location_add_required_child(VALUE self, VALUE name, VALUE start, VALUE end) { @@ -213,6 +203,15 @@ VALUE rbs_new_location(VALUE buffer, range rg) { rbs_loc *loc; VALUE obj = TypedData_Make_Struct(RBS_Location, rbs_loc, &location_type, loc); + rbs_loc_init(loc, buffer, rbs_new_loc_range(rg)); + + return obj; +} + +static VALUE rbs_new_location_from_loc_range(VALUE buffer, rbs_loc_range rg) { + rbs_loc *loc; + VALUE obj = TypedData_Make_Struct(RBS_Location, rbs_loc, &location_type, loc); + rbs_loc_init(loc, buffer, rg); return obj; @@ -226,12 +225,12 @@ static VALUE location_aref(VALUE self, VALUE name) { if (loc->children != NULL) { for (unsigned short i = 0; i < loc->children->len; i++) { if (loc->children->entries[i].name == id) { - range result = loc->children->entries[i].rg; + rbs_loc_range result = loc->children->entries[i].rg; - if (RBS_LOC_OPTIONAL_P(loc, i) && null_range_p(result)) { + if (RBS_LOC_OPTIONAL_P(loc, i) && NULL_LOC_RANGE_P(result)) { return Qnil; } else { - return rbs_new_location(loc->buffer, result); + return rbs_new_location_from_loc_range(loc->buffer, result); } } } diff --git a/ext/rbs_extension/location.h b/ext/rbs_extension/location.h index b8d1ddf14..7dce22ea1 100644 --- a/ext/rbs_extension/location.h +++ b/ext/rbs_extension/location.h @@ -9,9 +9,14 @@ * */ extern VALUE RBS_Location; +typedef struct { + int start; + int end; +} rbs_loc_range; + typedef struct { ID name; - range rg; + rbs_loc_range rg; } rbs_loc_entry; typedef unsigned int rbs_loc_entry_bitmap; @@ -27,7 +32,7 @@ typedef struct { typedef struct { VALUE buffer; - range rg; + rbs_loc_range rg; rbs_loc_children *children; // NULL when no children is allocated } rbs_loc; From 16006c2c8cce5d29995936c2ca507be0268aba97 Mon Sep 17 00:00:00 2001 From: Masataka Pocke Kuwabara Date: Thu, 5 Sep 2024 15:53:36 +0900 Subject: [PATCH 2/4] EOF token is zero length --- ext/rbs_extension/lexer.c | 2 +- ext/rbs_extension/lexer.h | 5 +++++ ext/rbs_extension/lexer.re | 2 +- ext/rbs_extension/lexstate.c | 16 ++++++++++++++++ lib/rbs/errors.rb | 2 +- test/rbs/cli_test.rb | 2 +- test/rbs/parser_test.rb | 2 +- 7 files changed, 26 insertions(+), 5 deletions(-) diff --git a/ext/rbs_extension/lexer.c b/ext/rbs_extension/lexer.c index 81a9e36a4..73984cf84 100644 --- a/ext/rbs_extension/lexer.c +++ b/ext/rbs_extension/lexer.c @@ -116,7 +116,7 @@ token rbsparser_next_token(lexstate *state) { yy1: rbs_skip(state); #line 144 "ext/rbs_extension/lexer.re" - { return next_token(state, pEOF); } + { return next_eof_token(state); } #line 121 "ext/rbs_extension/lexer.c" yy2: rbs_skip(state); diff --git a/ext/rbs_extension/lexer.h b/ext/rbs_extension/lexer.h index c426647f2..55a8a3994 100644 --- a/ext/rbs_extension/lexer.h +++ b/ext/rbs_extension/lexer.h @@ -167,6 +167,11 @@ void skipn(lexstate *state, size_t size); * */ token next_token(lexstate *state, enum TokenType type); +/** + * Return new token with EOF type. + * */ +token next_eof_token(lexstate *state); + token rbsparser_next_token(lexstate *state); void print_token(token tok); diff --git a/ext/rbs_extension/lexer.re b/ext/rbs_extension/lexer.re index cfd84f8e8..aa1b94746 100644 --- a/ext/rbs_extension/lexer.re +++ b/ext/rbs_extension/lexer.re @@ -141,7 +141,7 @@ token rbsparser_next_token(lexstate *state) { skip = ([ \t]+|[\r\n]); skip { return next_token(state, tTRIVIA); } - "\x00" { return next_token(state, pEOF); } + "\x00" { return next_eof_token(state); } * { return next_token(state, ErrorToken); } */ } diff --git a/ext/rbs_extension/lexstate.c b/ext/rbs_extension/lexstate.c index b6cb95a07..ed32fd06a 100644 --- a/ext/rbs_extension/lexstate.c +++ b/ext/rbs_extension/lexstate.c @@ -129,6 +129,22 @@ token next_token(lexstate *state, enum TokenType type) { return t; } +token next_eof_token(lexstate *state) { + if (state->current.byte_pos == RSTRING_LEN(state->string)+1) { + // End of String + token t; + t.type = pEOF; + t.range.start = state->start; + t.range.end = state->start; + state->start = state->current; + + return t; + } else { + // NULL byte in the middle of the string + return next_token(state, pEOF); + } +} + void rbs_skip(lexstate *state) { if (!state->last_char) { peek(state); diff --git a/lib/rbs/errors.rb b/lib/rbs/errors.rb index 32bdba46f..ea0373699 100644 --- a/lib/rbs/errors.rb +++ b/lib/rbs/errors.rb @@ -35,7 +35,7 @@ def detailed_message(highlight: false, **) return msg unless location.start_line == location.end_line indent = " " * location.start_column - marker = "^" * (location.end_column - location.start_column) + marker = "^" * ([location.end_column - location.start_column, 1].max or raise) io = StringIO.new io.puts msg diff --git a/test/rbs/cli_test.rb b/test/rbs/cli_test.rb index 815c624c3..f8701045c 100644 --- a/test/rbs/cli_test.rb +++ b/test/rbs/cli_test.rb @@ -912,7 +912,7 @@ def test_parse_method_type assert_raises(SystemExit) { cli.run(['parse', '--method-type', '-e', '()']) } assert_equal [ - "-e:1:2...1:3: Syntax error: expected a token `pARROW`, token=`` (pEOF) (RBS::ParsingError)", + "-e:1:2...1:2: Syntax error: expected a token `pARROW`, token=`` (pEOF) (RBS::ParsingError)", "", " ()", " ^" diff --git a/test/rbs/parser_test.rb b/test/rbs/parser_test.rb index ea0e9bcd2..adfde5f74 100644 --- a/test/rbs/parser_test.rb +++ b/test/rbs/parser_test.rb @@ -834,6 +834,6 @@ class Foo[T < Integer] < Bar # Comment assert_equal [:tTRIVIA, "\n", 52...53], tokens.shift.then { |t| [t[0], t[1].source, t[1].range] } assert_equal [:kEND, 'end', 53...56], tokens.shift.then { |t| [t[0], t[1].source, t[1].range] } assert_equal [:tTRIVIA, "\n", 56...57], tokens.shift.then { |t| [t[0], t[1].source, t[1].range] } - assert_equal [:pEOF, '', 57...58], tokens.shift.then { |t| [t[0], t[1].source, t[1].range] } + assert_equal [:pEOF, '', 57...57], tokens.shift.then { |t| [t[0], t[1].source, t[1].range] } end end From 81b44e1de4fd1f9b737b8d8cd8aa660b17bb952c Mon Sep 17 00:00:00 2001 From: Masataka Pocke Kuwabara Date: Thu, 5 Sep 2024 15:58:40 +0900 Subject: [PATCH 3/4] Fix line number on non-newline terminated string --- lib/rbs/buffer.rb | 5 +++++ test/rbs/buffer_test.rb | 30 ++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/lib/rbs/buffer.rb b/lib/rbs/buffer.rb index 10f0273df..c5f48dcfe 100644 --- a/lib/rbs/buffer.rb +++ b/lib/rbs/buffer.rb @@ -25,6 +25,11 @@ def ranges @ranges << range offset += size end + + if !content.end_with?("\n") && content.size > 0 + @ranges[-1] = @ranges[-1].begin...(@ranges[-1].end+1) + end + @ranges end end diff --git a/test/rbs/buffer_test.rb b/test/rbs/buffer_test.rb index 58a36b5f7..a3fbc01c6 100644 --- a/test/rbs/buffer_test.rb +++ b/test/rbs/buffer_test.rb @@ -37,4 +37,34 @@ def test_buffer assert_equal 8, buffer.last_position end + + def test_buffer_with_no_eol + buffer = Buffer.new(name: Pathname("foo.rbs"), content: "123\nabc") + + assert_equal ["123\n", "abc"], buffer.lines + assert_equal [0...4, 4...8], buffer.ranges + + assert_equal [1, 0], buffer.pos_to_loc(0) + assert_equal [1, 1], buffer.pos_to_loc(1) + assert_equal [1, 2], buffer.pos_to_loc(2) + assert_equal [1, 3], buffer.pos_to_loc(3) + assert_equal [2, 0], buffer.pos_to_loc(4) + assert_equal [2, 1], buffer.pos_to_loc(5) + assert_equal [2, 2], buffer.pos_to_loc(6) + assert_equal [2, 3], buffer.pos_to_loc(7) + + assert_equal 0, buffer.loc_to_pos([1, 0]) + assert_equal 1, buffer.loc_to_pos([1, 1]) + assert_equal 2, buffer.loc_to_pos([1, 2]) + assert_equal 3, buffer.loc_to_pos([1, 3]) + assert_equal 4, buffer.loc_to_pos([2, 0]) + assert_equal 5, buffer.loc_to_pos([2, 1]) + assert_equal 6, buffer.loc_to_pos([2, 2]) + assert_equal 7, buffer.loc_to_pos([2, 3]) + + assert_equal "123", buffer.content[buffer.loc_to_pos([1,0])...buffer.loc_to_pos([1,3])] + assert_equal "123\n", buffer.content[buffer.loc_to_pos([1,0])...buffer.loc_to_pos([2,0])] + + assert_equal 7, buffer.last_position + end end From 553543d451c00b570cae5037a8d05254de49761a Mon Sep 17 00:00:00 2001 From: Masataka Pocke Kuwabara Date: Fri, 6 Sep 2024 11:27:34 +0900 Subject: [PATCH 4/4] Remove unnecessary _end_loc/_start_loc methods --- ext/rbs_extension/location.c | 11 ----------- lib/rbs/location_aux.rb | 8 ++------ sig/location.rbs | 3 --- 3 files changed, 2 insertions(+), 20 deletions(-) diff --git a/ext/rbs_extension/location.c b/ext/rbs_extension/location.c index edbe7b8ef..29384f442 100644 --- a/ext/rbs_extension/location.c +++ b/ext/rbs_extension/location.c @@ -158,15 +158,6 @@ static VALUE location_end_pos(VALUE self) { return INT2FIX(loc->rg.end); } -// TODO: remove it -static VALUE location_start_loc(VALUE self) { - return Qnil; -} - -static VALUE location_end_loc(VALUE self) { - return Qnil; -} - static VALUE location_add_required_child(VALUE self, VALUE name, VALUE start, VALUE end) { rbs_loc *loc = rbs_check_location(self); @@ -293,8 +284,6 @@ void rbs__init_location(void) { rb_define_method(RBS_Location, "buffer", location_buffer, 0); rb_define_method(RBS_Location, "start_pos", location_start_pos, 0); rb_define_method(RBS_Location, "end_pos", location_end_pos, 0); - rb_define_private_method(RBS_Location, "_start_loc", location_start_loc, 0); - rb_define_private_method(RBS_Location, "_end_loc", location_end_loc, 0); rb_define_method(RBS_Location, "_add_required_child", location_add_required_child, 3); rb_define_method(RBS_Location, "_add_optional_child", location_add_optional_child, 3); rb_define_method(RBS_Location, "_add_optional_no_child", location_add_optional_no_child, 1); diff --git a/lib/rbs/location_aux.rb b/lib/rbs/location_aux.rb index bfdabce92..de0413686 100644 --- a/lib/rbs/location_aux.rb +++ b/lib/rbs/location_aux.rb @@ -49,15 +49,11 @@ def end_column end def start_loc - @start_loc ||= begin - _start_loc || buffer.pos_to_loc(start_pos) - end + @start_loc ||= buffer.pos_to_loc(start_pos) end def end_loc - @end_loc ||= begin - _end_loc || buffer.pos_to_loc(end_pos) - end + @end_loc ||= buffer.pos_to_loc(end_pos) end def range diff --git a/sig/location.rbs b/sig/location.rbs index 643080b9d..f9588ddc7 100644 --- a/sig/location.rbs +++ b/sig/location.rbs @@ -99,9 +99,6 @@ module RBS private - def _start_loc: () -> Buffer::loc? - def _end_loc: () -> Buffer::loc? - def _add_required_child: (RequiredChildKeys name, Integer start_pos, Integer end_pos) -> void def _add_optional_child: (OptionalChildKeys name, Integer start_pos, Integer end_pos) -> void def _add_optional_no_child: (OptionalChildKeys name) -> void