Skip to content

Commit

Permalink
#78 - переписал SG на полностью автоматическую генерацию паттерна рег…
Browse files Browse the repository at this point in the history
…улярки
  • Loading branch information
Stepami committed Sep 20, 2024
1 parent c1bd557 commit c12be00
Show file tree
Hide file tree
Showing 8 changed files with 73 additions and 111 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
using HydraScript.Domain.FrontEnd.Lexer;

namespace HydraScript.Infrastructure.LexerRegexGenerator;

internal class DefaultTokenTypesJsonStringProvider :
ITokenTypesJsonStringProvider
{
public string TokenTypesJsonString => TokenTypesJson.String;
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,8 @@
<PackageReference Include="Microsoft.CodeAnalysis.CSharp.Workspaces" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\..\Domain\HydraScript.Domain.FrontEnd\HydraScript.Domain.FrontEnd.csproj" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace HydraScript.Infrastructure.LexerRegexGenerator;

public interface ITokenTypesJsonStringProvider
{
public string TokenTypesJsonString { get; }
}
Original file line number Diff line number Diff line change
@@ -1,96 +1,42 @@
using System.Collections.Immutable;
using System.Text;
using System.Text.Json;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.CSharp.Syntax;
using Microsoft.CodeAnalysis.Text;

namespace HydraScript.Infrastructure.LexerRegexGenerator;

[Generator]
public partial class PatternGenerator : IIncrementalGenerator
{
private const string AttributeSourceCode = @"// <auto-generated/>
namespace HydraScript.Infrastructure;
[System.AttributeUsage(System.AttributeTargets.Class)]
public class PatternContainerAttribute<T>(string json) : System.Attribute
where T : HydraScript.Domain.FrontEnd.Lexer.IGeneratedRegexContainer
{
public string Json { get; } = json;
}
";
public ITokenTypesJsonStringProvider Provider { get; init; } = new DefaultTokenTypesJsonStringProvider();

public void Initialize(IncrementalGeneratorInitializationContext context)
{
context.RegisterPostInitializationOutput(ctx => ctx.AddSource(
"PatternContainerAttribute.g.cs",
SourceText.From(AttributeSourceCode, Encoding.UTF8)));

var provider = context.SyntaxProvider
.ForAttributeWithMetadataName(
"HydraScript.Infrastructure.PatternContainerAttribute`1",
static (s, _) => IsSyntaxTargetForGeneration(s),
static (ctx, _) => GetTypeDeclarationForSourceGen(ctx))
.Where(static x => x is not null)
.Select(static (x, _) => x!);

context.RegisterImplementationSourceOutput(provider.Collect(), GenerateCode);
}

private static bool IsSyntaxTargetForGeneration(SyntaxNode node) =>
node is ClassDeclarationSyntax candidate &&
candidate.Modifiers.Any(SyntaxKind.PartialKeyword) &&
candidate.Modifiers.Any(SyntaxKind.InternalKeyword);

private static RegexContainerInfo? GetTypeDeclarationForSourceGen(
GeneratorAttributeSyntaxContext context)
{
var attribute = context.Attributes.FirstOrDefault();
if (attribute is null)
return null;
var visitable = (ClassDeclarationSyntax)context.TargetNode;
var json = attribute.ConstructorArguments.First().Value!.ToString()!;
return new RegexContainerInfo(
ClassName: visitable.Identifier.Text,
json);
}
var tokenTypes = JsonSerializer.Deserialize(
Provider.TokenTypesJsonString,
PatternGeneratorContext.Default.IEnumerableTokenType)!
.OrderBy(x => x.Priority)
.Concat([new TokenType("ERROR", @"\S+", int.MaxValue)]);
var pattern = string.Join('|', tokenTypes.Select(t => t.GetNamedRegex()));

private static void GenerateCode(
SourceProductionContext context,
ImmutableArray<RegexContainerInfo> containerInfos)
{
foreach (var info in containerInfos)
{
var tokenTypes = JsonSerializer.Deserialize(
info.Json,
PatternGeneratorContext.Default.IEnumerableTokenType)!
.OrderBy(x => x.Priority)
.Concat([new TokenType("ERROR", @"\S+", int.MaxValue)]);
var pattern = string.Join('|', tokenTypes.Select(t => t.GetNamedRegex()));

var code = $@"// <auto-generated/>
var code = $@"// <auto-generated/>
using System.Diagnostics.CodeAnalysis;
namespace HydraScript.Infrastructure;
internal partial class {info.ClassName}
internal partial class PatternContainer
{{
[StringSyntax(StringSyntaxAttribute.Regex)]
public const string Pattern =
public const string Value =
""""""
{pattern}
"""""";
}}
";
context.AddSource($"{info.ClassName}.g.cs", SourceText.From(code, Encoding.UTF8));
}
}

private record RegexContainerInfo(
string ClassName,
string Json);
context.RegisterPostInitializationOutput(ctx => ctx.AddSource(
"PatternContainer.g.cs",
SourceText.From(code, Encoding.UTF8)));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@

namespace HydraScript.Infrastructure;

[PatternContainer<GeneratedRegexContainer>(TokenTypesJson.String)]
internal partial class GeneratedRegexContainer : IGeneratedRegexContainer
{
[GeneratedRegex("""(?<Comment>[/]{2}.*)|(?<FloatLiteral>[0-9]+[.][0-9]+)|(?<IntegerLiteral>[0-9]+)|(?<NullLiteral>null)|(?<BooleanLiteral>true|false)|(?<StringLiteral>\"(\\.|[^"\\])*\")|(?<Keyword>let|const|function|if|else|while|break|continue|return|as|type)|(?<Operator>[+]{1,2}|[-]|[*]|[/]|[%]|([!]|[=])[=]|([<]|[>])[=]?|[!]|[|]{2}|[&]{2}|[~]|[:]{2})|(?<Ident>[a-zA-Z][a-zA-Z0-9]*)|(?<QuestionMark>[?])|(?<Colon>[:])|(?<SemiColon>[;])|(?<Assign>[=])|(?<Comma>[,])|(?<LeftCurl>[{])|(?<RightCurl>[}])|(?<LeftParen>[(])|(?<RightParen>[)])|(?<Dot>[.])|(?<LeftBracket>[[])|(?<RightBracket>[]])|(?<ERROR>\S+)""")]
[GeneratedRegex(PatternContainer.Value)]
public static partial Regex GetRegex();
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
<PrivateAssets>all</PrivateAssets>
</PackageReference>
<PackageReference Include="NSubstitute" />
</ItemGroup>

<ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,62 +1,72 @@
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Reflection;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using NSubstitute;
using Xunit;

namespace HydraScript.Infrastructure.LexerRegexGenerator.Tests;

public class PatternGeneratorTests
{
[StringSyntax(StringSyntaxAttribute.Json)]
private const string JsonStringReplacement =
"""
[
{
"tag": "Test2",
"pattern": "test2",
"priority": 2
},
{
"tag": "Test1",
"pattern": "test1",
"priority": 1
}
]
""";

[Fact]
public void Initialize_PatternContainerMarked_CorrectlyGenerated()
{
var inputCompilation = CreateCompilation(
"""
using System.Text.RegularExpressions;
using HydraScript.Domain.FrontEnd.Lexer;
var provider = Substitute.For<ITokenTypesJsonStringProvider>();
provider.TokenTypesJsonString.Returns(JsonStringReplacement);
var generator = new PatternGenerator
{
Provider = provider
};
GeneratorDriver driver = CSharpGeneratorDriver.Create(generator);

namespace HydraScript.Infrastructure;
driver = driver.RunGeneratorsAndUpdateCompilation(CreateCompilation(string.Empty), out var outputCompilation,
out var diagnostics);
Debug.Assert(diagnostics.IsEmpty);
Debug.Assert(outputCompilation.SyntaxTrees.Count() == 2);

[PatternContainer<TestPatternContainer>("[{ \"tag\": \"Number\", \"pattern\": \"[0-9]+\", \"priority\": 2 }, { \"tag\": \"Word\", \"pattern\": \"[a-zA-Z]+\", \"priority\": 1 }]")]
internal partial class TestPatternContainer : IGeneratedRegexContainer
{
public static Regex GetRegex() => throw new NotImplementedException();
}
""");
var runResult = driver.GetRunResult();

var generatedFileSyntax = runResult.GeneratedTrees
.Single(t => t.FilePath.EndsWith("PatternContainer.g.cs"));

const string expectedSource =
const string expectedSource =
""""
// <auto-generated/>

using System.Diagnostics.CodeAnalysis;

namespace HydraScript.Infrastructure;

internal partial class TestPatternContainer
internal partial class PatternContainer
{
[StringSyntax(StringSyntaxAttribute.Regex)]
public const string Pattern =
public const string Value =
"""
(?<Word>[a-zA-Z]+)|(?<Number>[0-9]+)|(?<ERROR>\S+)
(?<Test1>test1)|(?<Test2>test2)|(?<ERROR>\S+)
""";
}

"""";

var generator = new PatternGenerator();
GeneratorDriver driver = CSharpGeneratorDriver.Create(generator);

driver = driver.RunGeneratorsAndUpdateCompilation(inputCompilation, out var outputCompilation,
out var diagnostics);
Debug.Assert(diagnostics.IsEmpty);
Debug.Assert(outputCompilation.SyntaxTrees.Count() == 3);

var runResult = driver.GetRunResult();

var generatedFileSyntax = runResult.GeneratedTrees
.Single(t => t.FilePath.EndsWith("TestPatternContainer.g.cs"));

Assert.Equal(
expectedSource,
generatedFileSyntax.GetText().ToString(),
Expand Down

This file was deleted.

0 comments on commit c12be00

Please sign in to comment.