Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance and cleanup lexer-parser interface #107

Merged
merged 2 commits into from
Jan 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 56 additions & 54 deletions src/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,6 @@ int skip_newline = 1;

int preproc_match;

/* Allow replacing identifiers with alias value if alias exists. This is
* disabled in certain cases, e.g. #undef.
*/
int preproc_aliasing = 1;

/* Point to the first character after where the macro has been called. It is
* needed when returning from the macro body.
*/
Expand Down Expand Up @@ -174,44 +169,15 @@ char read_char(int is_skip_space)
return next_char;
}

/* get alias name from defined() directive
* i.e., get __arm__ from defined(__arm__)
*/
void read_alias_name_from_defined(char *alias_name, char *src)
{
int i;

src = src + 8; /* skip defined( */
i = 0;
while (src[i] != ')') {
alias_name[i] = src[i];
i++;
}
alias_name[i] = 0;
}

char peek_char(int offset)
{
return SOURCE[source_idx + offset];
}

/* check alias defined or not */
void chk_def(int defined)
{
char *alias = NULL;
char alias_name[MAX_TOKEN_LEN];

if (defined) {
read_alias_name_from_defined(alias_name, token_str);
alias = find_alias(alias_name);
} else
alias = find_alias(token_str);

if (alias)
preproc_match = 1;
}

token_t get_next_token()
/* Lex next token and returns its token type. Parameter `aliasing` is used for
* disable preprocessor aliasing on identifier tokens.
*/
token_t lex_token_internal(int aliasing)
{
token_str[0] = 0;

Expand Down Expand Up @@ -257,7 +223,7 @@ token_t get_next_token()
read_char(0);
if (next_char == '/') {
read_char(1);
return get_next_token();
return lex_token_internal(aliasing);
}
}
} while (next_char);
Expand Down Expand Up @@ -549,7 +515,7 @@ token_t get_next_token()
if (!strcmp(token_str, "continue"))
return T_continue;

if (preproc_aliasing) {
if (aliasing) {
alias = find_alias(token_str);
if (alias) {
token_t t = is_numeric(alias) ? T_numeric : T_string;
Expand All @@ -570,7 +536,7 @@ token_t get_next_token()
next_char = SOURCE[source_idx];
} else
next_char = read_char(1);
return get_next_token();
return lex_token_internal(aliasing);
}

if (next_char == 0)
Expand All @@ -582,30 +548,45 @@ token_t get_next_token()
return T_eof;
}

/* Lex next token and returns its token type. To disable aliasing on next
* token, use `lex_token_internal`. */
token_t lex_token()
{
return lex_token_internal(1);
}

/* Skip the content. We only need the index where the macro body begins. */
void skip_macro_body()
{
while (!is_newline(next_char))
next_token = get_next_token();
next_token = lex_token();

skip_newline = 1;
next_token = get_next_token();
next_token = lex_token();
}

int lex_accept(token_t token)
/* Accepts next token if token types are matched. */
int lex_accept_internal(token_t token, int aliasing)
{
if (next_token == token) {
/* FIXME: this is a hack, fix aggressive aliasing first */
if (token == T_cppd_ifdef)
preproc_aliasing = 0;
next_token = get_next_token();
if (token == T_cppd_ifdef)
preproc_aliasing = 1;
next_token = lex_token_internal(aliasing);
return 1;
}

return 0;
}

/* Accepts next token if token types are matched. To disable aliasing
* on next token, use `lex_accept_internal`.
*/
int lex_accept(token_t token)
{
return lex_accept_internal(token, 1);
}

/* Peeks next token and copy token's literal to value if token types
* are matched.
*/
int lex_peek(token_t token, char *value)
{
if (next_token == token) {
Expand All @@ -617,17 +598,38 @@ int lex_peek(token_t token, char *value)
return 0;
}

void lex_ident(token_t token, char *value)
/* Strictly match next token with given token type and copy token's
* literal to value.
*/
void lex_ident_internal(token_t token, char *value, int aliasing)
{
if (next_token != token)
error("Unexpected token");
strcpy(value, token_str);
next_token = get_next_token();
next_token = lex_token_internal(aliasing);
}

void lex_expect(token_t token)
/* Strictly match next token with given token type and copy token's
* literal to value. To disable aliasing on next token, use
* `lex_ident_internal`.
*/
void lex_ident(token_t token, char *value)
{
lex_ident_internal(token, value, 1);
}

/* Strictly match next token with given token type. */
void lex_expect_internal(token_t token, int aliasing)
{
if (next_token != token)
error("Unexpected token");
next_token = get_next_token();
next_token = lex_token_internal(aliasing);
}

/* Strictly match next token with given token type. To disable aliasing
* on next token, use `lex_expect_internal`.
*/
void lex_expect(token_t token)
{
lex_expect_internal(token, 1);
}
67 changes: 24 additions & 43 deletions src/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ int get_size(var_t *var, type_t *type)
* whitespace */
void skip_line(int invalidate)
{
/* FIXME: Comments will causes current validation failed. */
skip_whitespace();
do {
if (invalidate && !is_whitespace(peek_char(0)) &&
Expand All @@ -75,28 +76,14 @@ void skip_line(int invalidate)
} while (read_char(0) != '\n');
}

void if_elif_skip_lines()
{
char peek_c;
int i;

do {
skip_whitespace();
i = 0;
do {
token_str[i++] = next_char;
} while (read_char(0) != '\n');
token_str[i] = 0;
read_char(1);
peek_c = peek_char(1);
} while (next_char != '#' || (next_char == '#' && peek_c == 'd'));
skip_whitespace();
}

void ifdef_else_skip_lines()
/* Skips lines where preprocessor match is false, this will stop once next
* token is either `T_cppd_elif`, `T_cppd_else` or `cppd_endif`.
*/
void cppd_control_flow_skip_lines()
{
while (!lex_peek(T_cppd_else, NULL) && !lex_peek(T_cppd_endif, NULL)) {
next_token = get_next_token();
while (!lex_peek(T_cppd_elif, NULL) && !lex_peek(T_cppd_else, NULL) &&
!lex_peek(T_cppd_endif, NULL)) {
next_token = lex_token();
}
skip_whitespace();
}
Expand All @@ -111,12 +98,10 @@ void read_defined_macro()
{
char lookup_alias[MAX_TOKEN_LEN];

preproc_aliasing = 0; /* to prevent aggressive aliasing */
lex_expect(T_identifier); /* defined */
lex_expect(T_open_bracket);
lex_expect_internal(T_open_bracket, 0);
lex_ident(T_identifier, lookup_alias);
lex_expect(T_close_bracket);
preproc_aliasing = 1;

check_def(lookup_alias);
}
Expand Down Expand Up @@ -169,10 +154,8 @@ int read_preproc_directive()
if (lex_peek(T_cppd_undef, token)) {
char alias[MAX_VAR_LEN];

preproc_aliasing = 0;
lex_expect(T_cppd_undef);
lex_expect_internal(T_cppd_undef, 0);
lex_peek(T_identifier, alias);
preproc_aliasing = 1;
lex_expect(T_identifier);

remove_alias(alias);
Expand Down Expand Up @@ -201,7 +184,7 @@ int read_preproc_directive()
return 1;
}

if_elif_skip_lines();
cppd_control_flow_skip_lines();
} else {
/* TODO: parse and evaluate constant expression here */
}
Expand All @@ -210,7 +193,7 @@ int read_preproc_directive()
if (lex_accept(T_cppd_elif)) {
if (preproc_match) {
while (!lex_peek(T_cppd_endif, NULL)) {
next_token = get_next_token();
next_token = lex_token();
}
return 1;
}
Expand All @@ -223,7 +206,7 @@ int read_preproc_directive()
return 1;
}

if_elif_skip_lines();
cppd_control_flow_skip_lines();
} else {
/* TODO: parse and evaluate constant expression here */
}
Expand All @@ -240,16 +223,15 @@ int read_preproc_directive()
return 1;
}

/* skip lines until #else or #endif */
ifdef_else_skip_lines();
cppd_control_flow_skip_lines();
return 1;
}
if (lex_accept(T_cppd_endif)) {
preproc_match = 0;
skip_whitespace();
return 1;
}
if (lex_accept(T_cppd_ifdef)) {
if (lex_accept_internal(T_cppd_ifdef, 0)) {
preproc_match = 0;
lex_ident(T_identifier, token);
check_def(token);
Expand All @@ -259,8 +241,7 @@ int read_preproc_directive()
return 1;
}

/* skip lines until #else or #endif */
ifdef_else_skip_lines();
cppd_control_flow_skip_lines();
return 1;
}

Expand Down Expand Up @@ -677,12 +658,12 @@ void read_expr_operand(block_t *parent, basic_block_t **bb)
for (i = 0; i < remainder; i++) {
source_idx = macro->params[macro->num_params - remainder + i];
next_char = SOURCE[source_idx];
next_token = get_next_token();
next_token = lex_token();
read_expr(parent, bb);
}
source_idx = t;
next_char = SOURCE[source_idx];
next_token = get_next_token();
next_token = lex_token();
} else if (mac) {
if (parent->macro)
error("Nested macro is not yet supported");
Expand All @@ -695,7 +676,7 @@ void read_expr_operand(block_t *parent, basic_block_t **bb)
while (!lex_peek(T_close_bracket, NULL)) {
mac->params[mac->num_params++] = source_idx;
do {
next_token = get_next_token();
next_token = lex_token();
} while (next_token != T_comma &&
next_token != T_close_bracket);
}
Expand All @@ -717,11 +698,11 @@ void read_expr_operand(block_t *parent, basic_block_t **bb)
int t = source_idx;
source_idx = macro_param_idx;
next_char = SOURCE[source_idx];
next_token = get_next_token();
next_token = lex_token();
read_expr(parent, bb);
source_idx = t;
next_char = SOURCE[source_idx];
next_token = get_next_token();
next_token = lex_token();
} else if (con) {
ph1_ir = add_ph1_ir(OP_load_constant);
vd = require_var(parent);
Expand Down Expand Up @@ -1599,15 +1580,15 @@ void eval_ternary_imm(int cond, char *token)
{
if (cond == 0) {
while (next_token != T_colon) {
next_token = get_next_token();
next_token = lex_token();
}
lex_accept(T_colon);
read_global_assignment(token);
} else {
read_global_assignment(token);
lex_expect(T_colon);
while (!lex_peek(T_semicolon, NULL)) {
next_token = get_next_token();
next_token = lex_token();
}
}
}
Expand Down Expand Up @@ -2453,7 +2434,7 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
while (!lex_peek(T_close_bracket, NULL)) {
mac->params[mac->num_params++] = source_idx;
do {
next_token = get_next_token();
next_token = lex_token();
} while (next_token != T_comma && next_token != T_close_bracket);
}
/* move `source_idx` to the macro body */
Expand Down