Skip to content

Commit

Permalink
Version 1.3 imported from SourceForge.
Browse files Browse the repository at this point in the history
  • Loading branch information
rpinchbeck committed Sep 17, 2017
1 parent 5872020 commit 06143f1
Show file tree
Hide file tree
Showing 20 changed files with 481 additions and 259 deletions.
3 changes: 3 additions & 0 deletions AbnfToAntlr.Common/AbnfToAntlr.Common.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,16 @@
<Compile Include="AbnfAstLexer.cs" />
<Compile Include="AbnfAstParser.cs" />
<Compile Include="AbnfToAntlrTranslator.cs" />
<Compile Include="INamedCharacterLookup.cs" />
<Compile Include="NamedCharacter.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="NamedCharacterLookupSimple.cs" />
<Compile Include="TreeVisitor.cs" />
<Compile Include="TreeVisitor_GatherDistinctCharacters.cs" />
<Compile Include="TreeVisitor_OutputTranslation.cs" />
<Compile Include="TreeVisitor_OutputTranslation_Direct.cs" />
<Compile Include="TreeVisitor_OutputTranslation_Indirect.cs" />
<Compile Include="NamedCharacterLookupUnicode.cs" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Expand Down
49 changes: 29 additions & 20 deletions AbnfToAntlr.Common/AbnfToAntlrTranslator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -132,16 +132,20 @@ public void Translate(TextReader input, TextWriter writer, bool performDirectTra
// get parse tree
var tree = results.Tree;

// gather set of disctinct literals
// give lexer rules unicode standard names
INamedCharacterLookup lookup = new NamedCharacterLookupUnicode();

// enable the next line to give lexer rules simple names
lookup = new NamedCharacterLookupSimple();

// output translated grammar
if (performDirectTranslation)
{
OutputDirectTranslation(writer, tokens, tree);
OutputDirectTranslation(writer, tokens, tree, lookup);
}
else
{
OutputIndirectTranslation(writer, tokens, tree);
OutputIndirectTranslation(writer, tokens, tree, lookup);
}
}
}
Expand All @@ -163,46 +167,51 @@ public string Translate(string abnfGrammar, bool performDirectTranslation = fals
}


void OutputDirectTranslation(TextWriter writer, CommonTokenStream tokens, CommonTree tree)
void OutputDirectTranslation(TextWriter writer, CommonTokenStream tokens, CommonTree tree, INamedCharacterLookup lookup)
{
// output ANTLR translation
var outputVisitor = new TreeVisitor_OutputTranslation_Direct(tokens, writer);
var outputVisitor = new TreeVisitor_OutputTranslation_Direct(tokens, writer, lookup);
outputVisitor.Visit(tree);
}

void OutputIndirectTranslation(TextWriter writer, CommonTokenStream tokens, CommonTree tree)
void OutputIndirectTranslation(TextWriter writer, CommonTokenStream tokens, CommonTree tree, INamedCharacterLookup lookup)
{
// gather distinct literals
var distinctCharacters = new Dictionary<char, NamedCharacter>();
var literalVisitor = new TreeVisitor_GatherDistinctCharacters(distinctCharacters);
var literalVisitor = new TreeVisitor_GatherDistinctCharacters(distinctCharacters, lookup);
literalVisitor.Visit(tree);

// output ANTLR translation (substitute rules for character literals)
var outputVisitor = new TreeVisitor_OutputTranslation_Indirect(tokens, writer, distinctCharacters);
var outputVisitor = new TreeVisitor_OutputTranslation_Indirect(tokens, writer, distinctCharacters, lookup);
outputVisitor.Visit(tree);

// append literal rules to output
OutputLiteralRules(distinctCharacters, writer);
OutputLiteralRules(distinctCharacters, writer, lookup);
}

void OutputLiteralRules(IDictionary<char, NamedCharacter> literals, TextWriter writer)
void OutputLiteralRules(IDictionary<char, NamedCharacter> literals, TextWriter writer, INamedCharacterLookup lookup)
{
var knownValues = literals.Values
.Where(x => NamedCharacter.IsKnownCharacter(x.Character))
var knownValues =
literals.Values
.Where(x => lookup.IsKnownCharacter(x.Character))
.OrderBy(x => x.Character)
.Select(x => x);

var unknownValues = literals.Values
.Where(x => !(NamedCharacter.IsKnownCharacter(x.Character)))
var unknownValues =
literals.Values
.Where(x => !(lookup.IsKnownCharacter(x.Character)))
.OrderBy(x => x.Character)
.Select(x => x);

writer.WriteLine("");
writer.WriteLine(@"//////////////////////////////////////////////////////////////////////////");
writer.WriteLine(@"// Lexer rules generated for each distinct character in original grammar");
writer.WriteLine(@"// per http://www.unicode.org/charts/PDF/U0000.pdf");
writer.WriteLine(@"//////////////////////////////////////////////////////////////////////////");
writer.WriteLine("");
if (literals.Count > 0)
{
writer.WriteLine("");
writer.WriteLine(@"//////////////////////////////////////////////////////////////////////////");
writer.WriteLine(@"// Lexer rules generated for each distinct character in original grammar");
writer.WriteLine(@"// per http://www.unicode.org/charts/PDF/U0000.pdf");
writer.WriteLine(@"//////////////////////////////////////////////////////////////////////////");
writer.WriteLine("");
}

// output known (named) literals first
foreach (var value in knownValues)
Expand Down
13 changes: 13 additions & 0 deletions AbnfToAntlr.Common/INamedCharacterLookup.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace AbnfToAntlr.Common
{
public interface INamedCharacterLookup
{
bool IsKnownCharacter(char character);
NamedCharacter GetNamedCharacter(char character);
}
}
144 changes: 2 additions & 142 deletions AbnfToAntlr.Common/NamedCharacter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,145 +30,5 @@ public class NamedCharacter
{
public string Name;
public char Character;

// Official character names from Unicode.org
// http://www.unicode.org/charts/PDF/U0000.pdf

public readonly static Dictionary<char, NamedCharacter> KnownCharacters =
new Dictionary<char, NamedCharacter>
{
{ 'A', new NamedCharacter { Name = "CAPITAL_LETTER_A", Character = 'A' } },
{ 'B', new NamedCharacter { Name = "CAPITAL_LETTER_B", Character = 'B' } },
{ 'C', new NamedCharacter { Name = "CAPITAL_LETTER_C", Character = 'C' } },
{ 'D', new NamedCharacter { Name = "CAPITAL_LETTER_D", Character = 'D' } },
{ 'E', new NamedCharacter { Name = "CAPITAL_LETTER_E", Character = 'E' } },
{ 'F', new NamedCharacter { Name = "CAPITAL_LETTER_F", Character = 'F' } },
{ 'G', new NamedCharacter { Name = "CAPITAL_LETTER_G", Character = 'G' } },
{ 'H', new NamedCharacter { Name = "CAPITAL_LETTER_H", Character = 'H' } },
{ 'I', new NamedCharacter { Name = "CAPITAL_LETTER_I", Character = 'I' } },
{ 'J', new NamedCharacter { Name = "CAPITAL_LETTER_J", Character = 'J' } },
{ 'K', new NamedCharacter { Name = "CAPITAL_LETTER_K", Character = 'K' } },
{ 'L', new NamedCharacter { Name = "CAPITAL_LETTER_L", Character = 'L' } },
{ 'M', new NamedCharacter { Name = "CAPITAL_LETTER_M", Character = 'M' } },
{ 'N', new NamedCharacter { Name = "CAPITAL_LETTER_N", Character = 'N' } },
{ 'O', new NamedCharacter { Name = "CAPITAL_LETTER_O", Character = 'O' } },
{ 'P', new NamedCharacter { Name = "CAPITAL_LETTER_P", Character = 'P' } },
{ 'Q', new NamedCharacter { Name = "CAPITAL_LETTER_Q", Character = 'Q' } },
{ 'R', new NamedCharacter { Name = "CAPITAL_LETTER_R", Character = 'R' } },
{ 'S', new NamedCharacter { Name = "CAPITAL_LETTER_S", Character = 'S' } },
{ 'T', new NamedCharacter { Name = "CAPITAL_LETTER_T", Character = 'T' } },
{ 'U', new NamedCharacter { Name = "CAPITAL_LETTER_U", Character = 'U' } },
{ 'V', new NamedCharacter { Name = "CAPITAL_LETTER_V", Character = 'V' } },
{ 'W', new NamedCharacter { Name = "CAPITAL_LETTER_W", Character = 'W' } },
{ 'X', new NamedCharacter { Name = "CAPITAL_LETTER_X", Character = 'X' } },
{ 'Y', new NamedCharacter { Name = "CAPITAL_LETTER_Y", Character = 'Y' } },
{ 'Z', new NamedCharacter { Name = "CAPITAL_LETTER_Z", Character = 'Z' } },

{ 'a', new NamedCharacter { Name = "SMALL_LETTER_A", Character = 'a' } },
{ 'b', new NamedCharacter { Name = "SMALL_LETTER_B", Character = 'b' } },
{ 'c', new NamedCharacter { Name = "SMALL_LETTER_C", Character = 'c' } },
{ 'd', new NamedCharacter { Name = "SMALL_LETTER_D", Character = 'd' } },
{ 'e', new NamedCharacter { Name = "SMALL_LETTER_E", Character = 'e' } },
{ 'f', new NamedCharacter { Name = "SMALL_LETTER_F", Character = 'f' } },
{ 'g', new NamedCharacter { Name = "SMALL_LETTER_G", Character = 'g' } },
{ 'h', new NamedCharacter { Name = "SMALL_LETTER_H", Character = 'h' } },
{ 'j', new NamedCharacter { Name = "SMALL_LETTER_I", Character = 'j' } },
{ 'i', new NamedCharacter { Name = "SMALL_LETTER_J", Character = 'i' } },
{ 'k', new NamedCharacter { Name = "SMALL_LETTER_K", Character = 'k' } },
{ 'l', new NamedCharacter { Name = "SMALL_LETTER_L", Character = 'l' } },
{ 'm', new NamedCharacter { Name = "SMALL_LETTER_M", Character = 'm' } },
{ 'n', new NamedCharacter { Name = "SMALL_LETTER_N", Character = 'n' } },
{ 'o', new NamedCharacter { Name = "SMALL_LETTER_O", Character = 'o' } },
{ 'p', new NamedCharacter { Name = "SMALL_LETTER_P", Character = 'p' } },
{ 'q', new NamedCharacter { Name = "SMALL_LETTER_Q", Character = 'q' } },
{ 'r', new NamedCharacter { Name = "SMALL_LETTER_R", Character = 'r' } },
{ 's', new NamedCharacter { Name = "SMALL_LETTER_S", Character = 's' } },
{ 't', new NamedCharacter { Name = "SMALL_LETTER_T", Character = 't' } },
{ 'u', new NamedCharacter { Name = "SMALL_LETTER_U", Character = 'u' } },
{ 'v', new NamedCharacter { Name = "SMALL_LETTER_V", Character = 'v' } },
{ 'w', new NamedCharacter { Name = "SMALL_LETTER_W", Character = 'w' } },
{ 'x', new NamedCharacter { Name = "SMALL_LETTER_X", Character = 'x' } },
{ 'y', new NamedCharacter { Name = "SMALL_LETTER_Y", Character = 'y' } },
{ 'z', new NamedCharacter { Name = "SMALL_LETTER_Z", Character = 'z' } },

{ '`', new NamedCharacter { Name = "GRAVE_ACCENT", Character = '`' } },
{ '1', new NamedCharacter { Name = "DIGIT_ONE", Character = '1' } },
{ '2', new NamedCharacter { Name = "DIGIT_TWO", Character = '2' } },
{ '3', new NamedCharacter { Name = "DIGIT_THREE", Character = '3' } },
{ '4', new NamedCharacter { Name = "DIGIT_FOUR", Character = '4' } },
{ '5', new NamedCharacter { Name = "DIGIT_FIVE", Character = '5' } },
{ '6', new NamedCharacter { Name = "DIGIT_SIX", Character = '6' } },
{ '7', new NamedCharacter { Name = "DIGIT_SEVEN", Character = '7' } },
{ '8', new NamedCharacter { Name = "DIGIT_EIGHT", Character = '8' } },
{ '9', new NamedCharacter { Name = "DIGIT_NINE", Character = '9' } },
{ '0', new NamedCharacter { Name = "DIGIT_ZERO", Character = '0' } },
{ '-', new NamedCharacter { Name = "HYPHEN_MINUS", Character = '-' } },
{ '=', new NamedCharacter { Name = "EQUALS_SIGN", Character = '=' } },
{ '[', new NamedCharacter { Name = "LEFT_SQUARE_BRACKET", Character = '[' } },
{ ']', new NamedCharacter { Name = "RIGHT_SQUARE_BRACKET", Character = ']' } },
{ '\\', new NamedCharacter { Name = "REVERSE_SOLIDUS", Character = '\\' } },
{ ';', new NamedCharacter { Name = "SEMICOLON", Character = ';' } },
{ '\'', new NamedCharacter { Name = "APOSTROPHE", Character = '\'' } },
{ ',', new NamedCharacter { Name = "COMMA", Character = ',' } },
{ '.', new NamedCharacter { Name = "FULL_STOP", Character = '.' } },
{ '/', new NamedCharacter { Name = "SOLIDUS", Character = '/' } },
{ '~', new NamedCharacter { Name = "TILDE", Character = '~' } },

{ '!', new NamedCharacter { Name = "EXCLAMATION_MARK", Character = '!' } },
{ '@', new NamedCharacter { Name = "COMMERCIAL_AT", Character = '@' } },
{ '#', new NamedCharacter { Name = "NUMBER_SIGN", Character = '#' } },
{ '$', new NamedCharacter { Name = "DOLLAR_SIGN", Character = '$' } },
{ '%', new NamedCharacter { Name = "PERCENT_SIGN", Character = '%' } },
{ '^', new NamedCharacter { Name = "CIRCUMFLEX_ACCENT", Character = '^' } },
{ '&', new NamedCharacter { Name = "AMPERSAND", Character = '&' } },
{ '*', new NamedCharacter { Name = "ASTERISK", Character = '*' } },
{ '(', new NamedCharacter { Name = "LEFT_PARENTHESIS", Character = '(' } },
{ ')', new NamedCharacter { Name = "RIGHT_PARENTHESIS", Character = ')' } },
{ '_', new NamedCharacter { Name = "LOW_LINE", Character = '_' } },
{ '+', new NamedCharacter { Name = "PLUS_SIGN", Character = '+' } },
{ '{', new NamedCharacter { Name = "LEFT_CURLY_BRACKET", Character = '{' } },
{ '}', new NamedCharacter { Name = "RIGHT_CURLY_BRACKET", Character = '}' } },
{ '|', new NamedCharacter { Name = "VERTICAL_LINE", Character = '|' } },
{ ':', new NamedCharacter { Name = "COLON", Character = ':' } },
{ '\"', new NamedCharacter { Name = "QUOTATION_MARK", Character = '\"' } },
{ '<', new NamedCharacter { Name = "LESS_THAN_SIGN", Character = '<' } },
{ '>', new NamedCharacter { Name = "GREATER_THAN_SIGN", Character = '>' } },
{ '?', new NamedCharacter { Name = "QUESTION_MARK", Character = '?' } },

{ ' ', new NamedCharacter { Name = "SPACE", Character = ' ' } },

{ '\u0009', new NamedCharacter { Name = "TAB", Character = '\u0009' } },
{ '\u000D', new NamedCharacter { Name = "CARRIAGE_RETURN", Character = '\u000D' } },
{ '\u000A', new NamedCharacter { Name = "LINE_FEED", Character = '\u000A' } },
};

public static bool IsKnownCharacter(char character)
{
return KnownCharacters.ContainsKey(character);
}

public static NamedCharacter GetNamedCharacter(char character)
{
NamedCharacter result;

if (KnownCharacters.TryGetValue(character, out result))
{
// do nothing
}
else
{
string name = "UNICODE_" + ((int)character).ToString("X4");

result =
new NamedCharacter
{
Name = name,
Character = character
};
}

return result;
}

} // class
} // namespace
}
}
Loading

0 comments on commit 06143f1

Please sign in to comment.