From e1e258f19ea27265c122b34e835ec0f27fcf8549 Mon Sep 17 00:00:00 2001 From: Shubham Ugare Date: Sun, 5 Jan 2025 17:21:09 -0600 Subject: [PATCH] Fix issue in accept sequence --- syncode/dfa_mask_store.py | 10 ++++------ syncode/parse_result.py | 6 ------ tests/test_grammar_go.py | 2 +- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/syncode/dfa_mask_store.py b/syncode/dfa_mask_store.py index d106373..44618e7 100644 --- a/syncode/dfa_mask_store.py +++ b/syncode/dfa_mask_store.py @@ -451,12 +451,10 @@ def _lookup_next_tokens(self, dfa_states: Iterable[DFAState], r: ParseResult) -> elif len(accept_sequence) == 2: overapprox_token_ids |= self._lookup_next_tokens_for_dfa_state(dfa_state, accept_sequence[1]) elif len(accept_sequence) == 3: - # This is useful in under-approximating `grammar_strict` mode as they help improve the precision of SynCode - if self._mode == 'grammar_strict': - # If the DFA state is a final state we can jump to the start of next terminal - if self._dfas.is_final(dfa_state): - ignore_init_state = self._dfas.initial(accept_sequence[1]) - overapprox_token_ids |= self._lookup_next_tokens_for_dfa_state(ignore_init_state, accept_sequence[2]) + # If the DFA state is a final state we can jump to the start of next terminal + if self._dfas.is_final(dfa_state): + ignore_init_state = self._dfas.initial(accept_sequence[1]) + overapprox_token_ids |= self._lookup_next_tokens_for_dfa_state(ignore_init_state, accept_sequence[2]) else: raise ValueError(f"Invalid accept sequence: {accept_sequence}") return overapprox_token_ids diff --git a/syncode/parse_result.py b/syncode/parse_result.py index ba9f04c..e66716b 100644 --- a/syncode/parse_result.py +++ b/syncode/parse_result.py @@ -63,12 +63,6 @@ def from_accept_terminals(cur_accept_terminals, next_accept_terminals, remainder accept_sequences.add(AcceptSequence([final_terminal, t2])) if ignore_terminals is not None: - # Since ignore terminals are allowed anywhere in the code (final terminal, ignore terminal) is also a valid accept sequence - for tignore in ignore_terminals: - accept_sequences.add(AcceptSequence([final_terminal, tignore])) - - # These 3 length accept sequences are useful in under-approximating - # `grammar_strict` mode as they help improve the precision of SynCode for tignore in ignore_terminals: for t2 in next_accept_terminals: accept_sequences.add(AcceptSequence([final_terminal, tignore, t2])) diff --git a/tests/test_grammar_go.py b/tests/test_grammar_go.py index 80fb54d..e2e689f 100644 --- a/tests/test_grammar_go.py +++ b/tests/test_grammar_go.py @@ -173,7 +173,7 @@ def test_go_parser15(self): partial_code = 'package main\n\nimport (\n\t"encoding/json"\n\t"reflect"\n)\nfunc numerical_letter_grade (grades []interface{}) []string {\n\tletter_grades := make([]string, len(grades))\n\tfor i, grade := range grades {\n\t\tswitch grade.(' res = inc_parser.get_acceptable_next_terminals(partial_code) self.assertIn(AcceptSequence(['LPAR', 'TYPE']), res.accept_sequences) - self.assertIn(AcceptSequence(['LPAR', '__IGNORE_0']), res.accept_sequences) + self.assertIn(AcceptSequence(['LPAR', '__IGNORE_0', 'NAME']), res.accept_sequences) def test_go_parser16(self): inc_parser.reset()