Skip to content

Commit

Permalink
(#72, #115) Parser: introduce the "lexer hack" to deal with syntax am…
Browse files Browse the repository at this point in the history
…biguity
  • Loading branch information
ForNeVeR committed Mar 27, 2022
1 parent c38f690 commit 718fd0d
Show file tree
Hide file tree
Showing 9 changed files with 150 additions and 32 deletions.
8 changes: 8 additions & 0 deletions Cesium.Ast/Statements.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
using System.Collections.Immutable;
using Yoakke.SynKit.C.Syntax;
using Yoakke.SynKit.Lexer;

namespace Cesium.Ast;

Expand All @@ -13,6 +15,12 @@ public interface IBlockItem {}
// 6.8.3 Expression and null statements
public record ExpressionStatement(Expression? Expression) : Statement;

/// <summary>
/// An expression of form <code>item1(item2);</code> which may be either a function call or a variable definition,
/// depending on the context.
/// </summary>
public record AmbiguousBlockItem(string Item1, string Item2) : IBlockItem;

// 6.8.6 Jump statements
public record GoToStatement(string Identifier) : Statement;
public record ReturnStatement(Expression Expression) : Statement;
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
System.Int32 <Module>::abs(System.Int32 x)
IL_0000: ldarg x
IL_0004: ret

System.Void <Module>::exit(System.Int32 x)
IL_0000: ret

System.Int32 <Module>::main()
Locals:
System.Int32 V_0
IL_0000: ldc.i4 42
IL_0005: neg
IL_0006: call System.Int32 <Module>::abs(System.Int32)
IL_000b: stloc V_0
IL_000f: ldloc V_0
IL_0013: call System.Void <Module>::exit(System.Int32)
11 changes: 11 additions & 0 deletions Cesium.CodeGen.Tests/CodeGenMethodTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -131,4 +131,15 @@ public Task PrimitiveTypes() => DoTest(@"int main(void)
return 0;
}");

[Fact]
public Task AmbiguousCallTest() => DoTest(@"
int abs(int x) { return x; }
void exit(int x) { }
int main()
{
int exitCode = abs(-42);
exit(exitCode);
}");
}
3 changes: 2 additions & 1 deletion Cesium.CodeGen.Tests/CodeGenPrimitiveTypeTests.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using Cesium.Ast;
using Cesium.CodeGen.Ir.Declarations;
using Cesium.CodeGen.Ir.Types;
using Cesium.Parser;
Expand All @@ -17,7 +18,7 @@ internal void Test(string typeString, PrimitiveTypeKind expectedKind)
var source = $"{typeString} x;";
var parser = new CParser(new CLexer(source));
var ast = parser.ParseDeclaration().Ok.Value;
var declarationInfo = (ScopedIdentifierDeclaration)IScopedDeclarationInfo.Of(ast);
var declarationInfo = (ScopedIdentifierDeclaration)IScopedDeclarationInfo.Of((Declaration)ast);
var item = declarationInfo.Items.Single();
var type = (PrimitiveType)item.Declaration.Type;
Assert.Equal(expectedKind, type.Kind);
Expand Down
1 change: 1 addition & 0 deletions Cesium.CodeGen/Extensions/BlockItemEx.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ internal static class BlockItemEx
Ast.CompoundStatement s => s.ToIntermediate(),
Ast.ReturnStatement s => new ReturnStatement(s),
Ast.ExpressionStatement s => new ExpressionStatement(s),
Ast.AmbiguousBlockItem a => new AmbiguousBlockItem(a),
_ => throw new NotImplementedException($"Statement not supported, yet: {blockItem}.")
};
}
73 changes: 73 additions & 0 deletions Cesium.CodeGen/Ir/BlockItems/AmbiguousBlockItem.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
using System.Collections.Immutable;
using Cesium.Ast;
using Cesium.CodeGen.Contexts;
using Yoakke.SynKit.C.Syntax;
using Yoakke.SynKit.Lexer;
using Yoakke.SynKit.Text;
using Range = Yoakke.SynKit.Text.Range;

namespace Cesium.CodeGen.Ir.BlockItems;

/// <summary>
/// This is a special block item which was constructed in an ambiguous context: it is either a declaration or a function
/// call, depending on the context.
///
/// It defines an AST of form <code>item1(item2);</code>, where item1 is either a function name or a type, and item2 is
/// either a variable name or an argument name.
/// </summary>
internal class AmbiguousBlockItem : IBlockItem
{
private readonly string _item1;
private readonly string _item2;

public AmbiguousBlockItem(Ast.AmbiguousBlockItem item)
{
(_item1, _item2) = item;
}

public IBlockItem Lower() => this;

public void EmitTo(FunctionScope scope)
{
// Check if this can be a valid variable declaration:
var typeReference = scope.Context.GetTypeReference(_item1);
var isValidVariableDeclaration = typeReference != null;

// Check if this can be a function call:
var function = scope.Functions.GetValueOrDefault(_item1);
var isValidFunctionCall = function != null;

if (isValidVariableDeclaration && !isValidFunctionCall)
EmitVariableDeclaration(scope);
else if (!isValidVariableDeclaration && isValidFunctionCall)
EmitFunctionCall(scope);
else if (!isValidVariableDeclaration && !isValidFunctionCall)
throw new NotSupportedException(
$"{_item1}({_item2}) is supposed to be either a variable declaration or a function call," +
" but wasn't resolved to be either.");
else if (isValidVariableDeclaration && isValidFunctionCall)
throw new NotSupportedException(
$"{_item1}({_item2}) is supposed to be either a variable declaration or a function call," +
$" but it's ambiguous which it is, since both a function and a type of name {_item1} exist.");
}

private void EmitVariableDeclaration(FunctionScope scope)
{
throw new NotImplementedException("Ambiguous variable declarations aren't supported, yet.");
}

private void EmitFunctionCall(FunctionScope scope)
{
CToken CreateFakeToken(string id) => new(new Range(), id, new Range(), id, CTokenType.Identifier);

var functionNameToken = CreateFakeToken(_item1);
var argumentToken = CreateFakeToken(_item2);

var functionCallExpression = new Expressions.FunctionCallExpression(new FunctionCallExpression(
new ConstantExpression(functionNameToken),
ImmutableArray.Create<Expression>(new ConstantExpression(argumentToken))
));
var realNode = new ExpressionStatement(functionCallExpression);
realNode.Lower().EmitTo(scope);
}
}
2 changes: 1 addition & 1 deletion Cesium.CodeGen/Ir/BlockItems/ExpressionStatement.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ namespace Cesium.CodeGen.Ir.BlockItems;
internal class ExpressionStatement : IBlockItem
{
private readonly IExpression? _expression;
private ExpressionStatement(IExpression? expression)
internal ExpressionStatement(IExpression? expression)
{
_expression = expression;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,26 +75,9 @@
]
},
{
"$type": "Cesium.Ast.ExpressionStatement, Cesium.Ast",
"Expression": {
"$type": "Cesium.Ast.FunctionCallExpression, Cesium.Ast",
"Function": {
"$type": "Cesium.Ast.ConstantExpression, Cesium.Ast",
"Constant": {
"Kind": "Identifier",
"Text": "exit"
}
},
"Arguments": [
{
"$type": "Cesium.Ast.ConstantExpression, Cesium.Ast",
"Constant": {
"Kind": "Identifier",
"Text": "exitCode"
}
}
]
}
"$type": "Cesium.Ast.AmbiguousBlockItem, Cesium.Ast",
"PossibleFunction": "exit",
"PossibleArgument": "exitCode"
}
]
}
Expand Down
45 changes: 35 additions & 10 deletions Cesium.Parser/CParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ private static Expression MakeAssignmentExpression(
//
// declaration: declaration_specifiers init_declarator_list? ';'
[CustomParser("declaration")]
private ParseResult<Declaration> CustomParseDeclaration(int offset)
private ParseResult<IBlockItem> CustomParseDeclaration(int offset)
{
var declarationSpecifiersResult = CustomParseOneOrMore(parseDeclarationSpecifier, offset);
if (declarationSpecifiersResult.IsError) return declarationSpecifiersResult.Error;
Expand Down Expand Up @@ -214,10 +214,31 @@ private ParseResult<Declaration> CustomParseDeclaration(int offset)
return ParseResult.Error(";", t, t!.Range.Start, ";");
}

private static Declaration MakeDeclaration(
private static IBlockItem MakeDeclaration(
DeclarationSpecifiers specifiers,
InitDeclaratorList? initDeclarators,
IToken _) => new(specifiers, initDeclarators);
IToken _)
{
// NOTE: this is the "lexer hack" to deal with syntax ambiguity. The same syntax may be either a function call
// or a variable declaration, depending on the context, and we have no this information in the parser, yet.
if (specifiers.Length == 1 && specifiers.Single() is NamedTypeSpecifier specifier
&& initDeclarators?.Length == 1)
{
var ((pointer, directDeclarator), initializer) = initDeclarators.Value.Single();
if (pointer == null && initializer == null && directDeclarator is DeclaratorDirectDeclarator ddd)
{
ddd.Deconstruct(out var nestedDeclarator);
var (nestedPointer, nestedDirectDeclarator) = nestedDeclarator;
if (nestedPointer == null && nestedDirectDeclarator is IdentifierDirectDeclarator idd)
{
idd.Deconstruct(out var identifier);
return new AmbiguousBlockItem(specifier.TypeDefName, identifier);
}
}
}

return new Declaration(specifiers, initDeclarators);
}

// TODO[#107]: This is a synthetic set of rules which is absent from the C standard, but required to simplify the
// implementation. Get rid of this, eventually.
Expand Down Expand Up @@ -591,7 +612,7 @@ private static AssignmentInitializer MakeInitializer(Expression assignmentExpres
// TODO: [Rule("statement: selection_statement")]
// TODO: [Rule("statement: iteration_statement")]
[Rule("statement: jump_statement")]
private static Statement MakeStatementIdentity(Statement statement) => statement;
private static IBlockItem MakeStatementIdentity(IBlockItem statement) => statement;

// TODO: 6.8.1 Labeled statements
// 6.8.2 Compound statement
Expand All @@ -611,7 +632,7 @@ private static CompoundStatement MakeCompoundStatement(ICToken _, BlockItemList?

// 6.8.3 Expression and null statements
[Rule("expression_statement: expression? ';'")]
private static ExpressionStatement MakeExpressionStatement(Expression expression, IToken _) => new(expression);
private static ExpressionStatement MakeExpressionStatement(Expression? expression, IToken _) => new(expression);

// TODO: 6.8.4 Selection statements
// TODO: 6.8.5 Iteration statements
Expand Down Expand Up @@ -640,8 +661,9 @@ private static TranslationUnit MakeTranslationUnit(TranslationUnit init, Externa
private static ExternalDeclaration MakeExternalDeclaration(FunctionDefinition function) => function;

[Rule("external_declaration: declaration")]
private static ExternalDeclaration MakeExternalDeclaration(Declaration declaration) =>
new SymbolDeclaration(declaration);
private static ExternalDeclaration MakeExternalDeclaration(IBlockItem declaration) =>
// TODO[#115]: This direct cast should't be necessary. It is here because of the "lexer hack".
new SymbolDeclaration((Declaration)declaration);

// 6.9.1 Function definitions

Expand Down Expand Up @@ -680,13 +702,16 @@ private static FunctionDefinition MakeFunctionDefinition(
CompoundStatement statement) => new(specifiers, declarator, declarationList, statement);

[Rule("declaration_list: declaration")]
private static ImmutableArray<Declaration> MakeDeclarationList(Declaration declaration) =>
ImmutableArray.Create(declaration);
private static ImmutableArray<Declaration> MakeDeclarationList(IBlockItem declaration) =>
// TODO[#115]: This direct cast should't be necessary. It is here because of the "lexer hack".
ImmutableArray.Create((Declaration)declaration);

[Rule("declaration_list: declaration_list declaration")]
private static ImmutableArray<Declaration> MakeDeclarationList(
ImmutableArray<Declaration> declarations,
Declaration newDeclaration) => declarations.Add(newDeclaration);
IBlockItem newDeclaration) =>
// TODO[#115]: This direct cast should't be necessary. It is here because of the "lexer hack".
declarations.Add((Declaration)newDeclaration);

// TODO: 6.9.2 External object definitions

Expand Down

0 comments on commit 718fd0d

Please sign in to comment.