diff --git a/MarkdownSpec.md b/MarkdownSpec.md index 886e99c95..adeb07b65 100644 --- a/MarkdownSpec.md +++ b/MarkdownSpec.md @@ -70,4 +70,14 @@ __Непарные_ символы в рамках одного абзаца н превратится в: -\

Заголовок \с \разными\ символами\\

\ No newline at end of file +\

Заголовок \с \разными\ символами\\

+ + + +# Ссылка + +Текст, оформленный в виде [текст ссылки](URL), должен превращаться в HTML-тег вот так: +[пример ссылки](https://example.com) + +превратится в: +пример ссылки \ No newline at end of file diff --git a/cs/.dockerignore b/cs/.dockerignore new file mode 100644 index 000000000..cd967fc3a --- /dev/null +++ b/cs/.dockerignore @@ -0,0 +1,25 @@ +**/.dockerignore +**/.env +**/.git +**/.gitignore +**/.project +**/.settings +**/.toolstarget +**/.vs +**/.vscode +**/.idea +**/*.*proj.user +**/*.dbmdl +**/*.jfm +**/azds.yaml +**/bin +**/charts +**/docker-compose* +**/Dockerfile* +**/node_modules +**/npm-debug.log +**/obj +**/secrets.dev.yaml +**/values.dev.yaml +LICENSE +README.md \ No newline at end of file diff --git a/cs/MarkDownTest/Extension/TokenExtension.cs b/cs/MarkDownTest/Extension/TokenExtension.cs new file mode 100644 index 000000000..e1db39e1a --- /dev/null +++ b/cs/MarkDownTest/Extension/TokenExtension.cs @@ -0,0 +1,14 @@ +using FluentAssertions; +using FluentAssertions.Collections; +using Markdown.Token; + +namespace MarkDownTest.Extension; + +public static class TokenExtension +{ + public static void AssertTokensEqual(this IEnumerable actual, IEnumerable expected) + { + actual.Should().BeEquivalentTo(expected, + options => options.WithStrictOrdering()); + } +} \ No newline at end of file diff --git a/cs/MarkDownTest/MarkDownConverterTest.cs b/cs/MarkDownTest/MarkDownConverterTest.cs new file mode 100644 index 000000000..2839af090 --- /dev/null +++ b/cs/MarkDownTest/MarkDownConverterTest.cs @@ -0,0 +1,193 @@ +using FluentAssertions; +using Markdown; +using Markdown.interfaces; +using Markdown.MarkDownConverter; +using Markdown.Token; +using NUnit.Framework; + +namespace MarkDownTest; + +public class MarkdownConverterTests +{ + private IMarkdownConverter _converter; + + [SetUp] + public void Setup() + { + _converter = new MarkdownConverter(); + } + + [Test] + public void Convert_TextOnly_ShouldReturnPlainText() + { + var tokens = new List + { + Token.CreateText("Hello, World!", 0) + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("Hello, World!"); + } + + [Test] + public void Convert_StrongText_ShouldReturnStrongHtml() + { + var tokens = new List + { + Token.CreateStrong(true, 0), + Token.CreateText("bold", 2), + Token.CreateStrong(false, 6) + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("bold"); + } + + [Test] + public void Convert_ItalicText_ShouldReturnItalicHtml() + { + var tokens = new List + { + Token.CreateItalic(true, 0), + Token.CreateText("italic", 1), + Token.CreateItalic(false, 7) + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("italic"); + } + + [Test] + public void Convert_StrongAndItalicNested_ShouldReturnNestedHtml() + { + var tokens = new List + { + Token.CreateStrong(true, 0), + Token.CreateText("bold ", 2), + Token.CreateItalic(true, 7), + Token.CreateText("italic", 8), + Token.CreateItalic(false, 14), + Token.CreateText(" bold", 15), + Token.CreateStrong(false, 20) + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("bold italic bold"); + } + + [Test] + public void Convert_Header_ShouldReturnHeaderHtml() + { + var tokens = new List + { + Token.CreateHeader(2, 0), + Token.CreateText("Header Text", 2), + new Token("", TokenType.Header, TagState.Close, 13, 2) + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("

Header Text

"); + } + + [Test] + public void Convert_HeaderWithFormatting_ShouldReturnFormattedHeaderHtml() + { + var tokens = new List + { + Token.CreateHeader(1, 0), + Token.CreateText("Welcome to ", 1), + Token.CreateItalic(true, 12), + Token.CreateText("Markdown", 13), + Token.CreateItalic(false, 21), + new Token("", TokenType.Header, TagState.Close, 22, 1) + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("

Welcome to Markdown

"); + } + + [Test] + public void Convert_IncorrectClosingTag_ShouldAddAsText() + { + var tokens = new List + { + Token.CreateStrong(true, 0), + Token.CreateText("bold ", 2), + Token.CreateItalic(true, 7), + Token.CreateText("italic", 8), + Token.CreateStrong(false, 14), + Token.CreateText(" text_", 16) + }; + + var result = _converter.Convert(tokens); + + + result.Should().Be("bold italic__ text_"); + } + + [Test] + public void Convert_UnmatchedClosingTag_ShouldAddAsText() + { + var tokens = new List + { + Token.CreateItalic(false, 0), + Token.CreateText("text_", 1) + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("_text_"); + } + + [Test] + public void Convert_UnclosedTags_ShouldCloseRemainingTags() + { + var tokens = new List + { + Token.CreateStrong(true, 0), + Token.CreateText("bold", 2) + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("bold"); + } + + [Test] + public void Convert_Link_ShouldReturnLinkHtml() + { + var tokens = new List + { + Token.CreateLink("пример ссылки", "https://example.com", 0) + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("пример ссылки"); + } + + + [Test] + public void Convert_StrongTextWithLink_ShouldReturnCorrectHtml() + { + var tokens = new List + { + Token.CreateStrong(true, 0), + Token.CreateText("Посетите ", 2), + Token.CreateLink("сайт", "https://example.com", 11), + Token.CreateStrong(false, 31) + }; + + var result = _converter.Convert(tokens); + + result.Should().Be("Посетите сайт"); + } + + +} \ No newline at end of file diff --git a/cs/MarkDownTest/MarkDownTest.csproj b/cs/MarkDownTest/MarkDownTest.csproj new file mode 100644 index 000000000..f389e53f0 --- /dev/null +++ b/cs/MarkDownTest/MarkDownTest.csproj @@ -0,0 +1,25 @@ + + + + net8.0 + enable + enable + + false + true + + + + + + + + + + + + + + + + diff --git a/cs/MarkDownTest/MarkdownParserTests.cs b/cs/MarkDownTest/MarkdownParserTests.cs new file mode 100644 index 000000000..ea1330979 --- /dev/null +++ b/cs/MarkDownTest/MarkdownParserTests.cs @@ -0,0 +1,382 @@ +using FluentAssertions; +using Markdown; +using Markdown.Parser; +using Markdown.Parser.Interface; +using Markdown.Token; +using MarkDownTest.Extension; +using NUnit.Framework; + +namespace MarkDownTest; + +public class MarkdownParserTests +{ + private IMarkdownParser _parser; + + [SetUp] + public void Setup() + { + _parser = new MarkdownParser(); + } + + [Test] + public void Parse_PlainText_ReturnsTextToken() + { + var input = "Simple text"; + var expectedTokens = new[] + { + Token.CreateText("Simple text", 0) + }; + + var tokens = _parser.Parse(input); + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_TextWithSymbols_ReturnsTextToken() + { + var input = "Text with symbols !@#$%^&*()"; + var expectedTokens = new[] + { + Token.CreateText("Text with symbols !@#$%^&*()", 0) + }; + + var tokens = _parser.Parse(input); + + tokens.AssertTokensEqual(expectedTokens); + } + + [TestCase("# Header", 1)] + [TestCase("## Header", 2)] + [TestCase("###### Header", 6)] + public void Parse_Header_ReturnsHeaderAndTextTokens(string input, int expectedLevel) + { + var expectedTokens = new[] + { + Token.CreateHeader(expectedLevel, 0), + Token.CreateText("Header", expectedLevel + 1) + }; + + var tokens = _parser.Parse(input); + + tokens.AssertTokensEqual(expectedTokens); + } + + [TestCase("#Header")] + [TestCase("####### Header")] + [TestCase("Text# Header")] + public void Parse_InvalidHeader_ReturnsSingleTextToken(string input) + { + var expectedTokens = new[] + { + Token.CreateText(input, 0) + }; + + var tokens = _parser.Parse(input); + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_StrongEmphasis_ReturnsCorrectTokens() + { + var input = "__bold__"; + var expectedTokens = new[] + { + Token.CreateStrong(true, 0), + Token.CreateText("bold", 2), + Token.CreateStrong(false, 6) + }; + + var tokens = _parser.Parse(input); + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_ItalicEmphasis_ReturnsCorrectTokens() + { + var input = "_italic_"; + var expectedTokens = new[] + { + Token.CreateItalic(true, 0), + Token.CreateText("italic", 1), + Token.CreateItalic(false, 7) + }; + + var tokens = _parser.Parse(input); + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_NestedEmphasis_ReturnsCorrectTokens() + { + var input = "__bold _italic_ text__"; + var expectedTokens = new[] + { + Token.CreateStrong(true, 0), + Token.CreateText("bold ", 2), + Token.CreateItalic(true, 7), + Token.CreateText("italic", 8), + Token.CreateItalic(false, 14), + Token.CreateText(" text", 15), + Token.CreateStrong(false, 20) + }; + + var tokens = _parser.Parse(input); + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_NestedTags_ClosesInCorrectOrder() + { + var input = "__bold italic__ text_"; + var expectedTokens = new[] + { + Token.CreateStrong(true, 0), + Token.CreateText("bold italic", 2), + Token.CreateStrong(false, 13), + Token.CreateText(" text_", 15), + }; + + var tokens = _parser.Parse(input); + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_ComplexMarkdown_ReturnsCorrectTokenSequence() + { + var input = "# Header\n__bold _italic_ text__"; + var expectedTokens = new[] + { + Token.CreateHeader(1, 0), + Token.CreateText("Header\n", 2), + Token.CreateStrong(true, 9), + Token.CreateText("bold ", 11), + Token.CreateItalic(true, 16), + Token.CreateText("italic", 17), + Token.CreateItalic(false, 23), + Token.CreateText(" text", 24), + Token.CreateStrong(false, 29) + }; + + var tokens = _parser.Parse(input); + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_NestedTags_HandlesNestedStrongAndItalic() + { + var input = "__bold _italic_ bold__"; + var expectedTokens = new[] + { + Token.CreateStrong(true, 0), + Token.CreateText("bold ", 2), + Token.CreateItalic(true, 7), + Token.CreateText("italic", 8), + Token.CreateItalic(false, 14), + Token.CreateText(" bold", 15), + Token.CreateStrong(false, 20), + }; + + var tokens = _parser.Parse(input); + + tokens.AssertTokensEqual(expectedTokens); + } + + [TestCase("")] + [TestCase(" ")] + [TestCase("\n")] + [TestCase(null)] + public void Parse_MinimalInput_ReturnsTextToken(string input) + { + var expectedTokens = new[] { Token.CreateText(input, 0) }; + if (input is null || input.Length == 0) + { + expectedTokens = Array.Empty(); + } + + var tokens = _parser.Parse(input); + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_MixedContent_PreservesWhitespaceInTextTokens() + { + var input = "Text __with spaces__ here"; + var expectedTokens = new[] + { + Token.CreateText("Text ", 0), + Token.CreateStrong(true, 6), + Token.CreateText("with spaces", 8), + Token.CreateStrong(false, 20), + Token.CreateText(" here", 22) + }; + + var tokens = _parser.Parse(input); + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_Link_ReturnsLinkToken() + { + var input = "[пример ссылки](https://example.com)"; + var expectedTokens = new[] + { + Token.CreateLink("пример ссылки", "https://example.com", 0) + }; + + var tokens = _parser.Parse(input); + + tokens.AssertTokensEqual(expectedTokens); + } + + + [Test] + public void Parse_StrongTextWithLink_ReturnsCorrectTokens() + { + var input = "_Посетите [сайт](https://example.com)_"; + var expectedTokens = new[] + { + Token.CreateItalic(true, 0), + Token.CreateText("Посетите ", 1), + Token.CreateLink("сайт", "https://example.com", 10), + Token.CreateItalic(false, 37) + }; + + var tokens = _parser.Parse(input); + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_WithIncorrectItalicSpacing_ReturnsTextAsIs() + { + var input = "_подчерки _не считаются_"; + var expectedTokens = new[] + { + Token.CreateText("_подчерки ", 0), + Token.CreateItalic(true, 10), + Token.CreateText("не считаются", 11), + Token.CreateItalic(false, 23) + }; + + var tokens = _parser.Parse(input); + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_WithUnderscoresWithinWords_NoFormattingApplied() + { + var input = "ра_зных сл_овах"; + var expectedTokens = new[] + { + Token.CreateText("ра_зных сл_овах", 0) + }; + + var tokens = _parser.Parse(input); + + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_WithTrailingUnderscoresAfterWords_NoFormattingApplied() + { + var input = "эти_ подчерки_ не должны работать"; + var expectedTokens = new[] + { + Token.CreateText("эти_ подчерки_ не должны работать", 0) + }; + + var tokens = _parser.Parse(input); + + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_WithUnpairedUnderscore_ReturnsTextAsIs() + { + var input = "Непарные_ символы"; + var expectedTokens = new[] + { + Token.CreateText("Непарные_ символы", 0) + }; + + var tokens = _parser.Parse(input); + + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_WithUnderscoresAroundNumbers_NoFormattingApplied() + { + var input = "цифрами_12_3"; + var expectedTokens = new[] + { + Token.CreateText("цифрами_12_3", 0) + }; + + var tokens = _parser.Parse(input); + + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_WithMultipleConsecutiveUnderscores_ReturnsTextAsIs() + { + var input = "____"; + var expectedTokens = new[] + { + Token.CreateText("____", 0) + }; + + var tokens = _parser.Parse(input); + + + tokens.AssertTokensEqual(expectedTokens); + } + + + [Test] + public void Parse_WithEscapedCharacters_ReturnsTextWithEscapedSymbols() + { + var input = @"\_не_подчеркивается\_"; + var expectedTokens = new[] + { + Token.CreateText("_", 0), + Token.CreateText("не_подчеркивается", 2), + Token.CreateText("_", 19), + }; + + var tokens = _parser.Parse(input); + + + tokens.AssertTokensEqual(expectedTokens); + } + + [Test] + public void Parse_WithLineBreaksBreakingControlCharacters_ReturnsTextAsIs() + { + var input = "Это пример с разрывом _подчеркивания\nна новой строке_"; + var expectedTokens = new[] + { + Token.CreateText("Это пример с разрывом _подчеркивания\nна новой строке_", 0) + }; + + var tokens = _parser.Parse(input); + + + tokens.AssertTokensEqual(expectedTokens); + } +} \ No newline at end of file diff --git a/cs/Markdown/MarkDown.cs b/cs/Markdown/MarkDown.cs new file mode 100644 index 000000000..2cd44ff9f --- /dev/null +++ b/cs/Markdown/MarkDown.cs @@ -0,0 +1,18 @@ +using Markdown.interfaces; +using Markdown.MarkDownConverter; +using Markdown.Parser; +using Markdown.Parser.Interface; + +namespace Markdown; + +public class MarkDown +{ + private readonly IMarkdownConverter _converter = new MarkdownConverter(); + private readonly IMarkdownParser _parser = new MarkdownParser(); + + public string Render(string markdown) + { + var tokens = _parser.Parse(markdown); + return _converter.Convert(tokens); + } +} \ No newline at end of file diff --git a/cs/Markdown/MarkDownConverter/Interface/IMarkdownConverter.cs b/cs/Markdown/MarkDownConverter/Interface/IMarkdownConverter.cs new file mode 100644 index 000000000..78132ab9b --- /dev/null +++ b/cs/Markdown/MarkDownConverter/Interface/IMarkdownConverter.cs @@ -0,0 +1,6 @@ +namespace Markdown.interfaces; + +public interface IMarkdownConverter +{ + string Convert(IEnumerable tokens); +} \ No newline at end of file diff --git a/cs/Markdown/MarkDownConverter/MarkdownConverter.cs b/cs/Markdown/MarkDownConverter/MarkdownConverter.cs new file mode 100644 index 000000000..6d5b9df42 --- /dev/null +++ b/cs/Markdown/MarkDownConverter/MarkdownConverter.cs @@ -0,0 +1,61 @@ +using System.Text; +using Markdown.interfaces; +using Markdown.MarkDownConverter.TagConverters; +using Markdown.Token; + +namespace Markdown.MarkDownConverter; + +public class MarkdownConverter : IMarkdownConverter +{ + private readonly IList tagConverters; + + public MarkdownConverter() + { + tagConverters = new List + { + new TextConverter(), + new StrongConverter(), + new ItalicConverter(), + new HeaderConverter(), + new LinkConverter() + }; + } + + public string Convert(IEnumerable tokens) + { + var result = new StringBuilder(); + var tagStack = new Stack(); + + foreach (var token in tokens) + { + var converter = tagConverters.FirstOrDefault(c => c.CanHandle(token.Type)); + if (converter != null) + { + converter.Handle(token, tagStack, result); + } + else + { + result.Append(System.Net.WebUtility.HtmlEncode(token.Text)); + } + } + + while (tagStack.Count > 0) + { + var openTag = tagStack.Pop(); + result.Append(GetClosingTag(openTag)); + } + + return result.ToString(); + } + + private string GetClosingTag(TokenType type) + { + return type switch + { + TokenType.Strong => "", + TokenType.Italic => "", + TokenType.Header => "", + _ => string.Empty + }; + } +} \ No newline at end of file diff --git a/cs/Markdown/MarkDownConverter/TagConverters/HeaderConverter.cs b/cs/Markdown/MarkDownConverter/TagConverters/HeaderConverter.cs new file mode 100644 index 000000000..bf554fd59 --- /dev/null +++ b/cs/Markdown/MarkDownConverter/TagConverters/HeaderConverter.cs @@ -0,0 +1,30 @@ +using System.Text; +using Markdown.Token; + +namespace Markdown.MarkDownConverter.TagConverters; + +public class HeaderConverter : ITagConverter +{ + public bool CanHandle(TokenType type) => type == TokenType.Header; + + public void Handle(Token.Token token, Stack tagStack, StringBuilder result) + { + if (token.State == TagState.Open) + { + result.Append($""); + tagStack.Push(token.Type); + } + else + { + if (tagStack.Count > 0 && tagStack.Peek() == token.Type) + { + result.Append($""); + tagStack.Pop(); + } + else + { + result.Append(System.Net.WebUtility.HtmlEncode(token.Text)); + } + } + } +} \ No newline at end of file diff --git a/cs/Markdown/MarkDownConverter/TagConverters/ITagConverter.cs b/cs/Markdown/MarkDownConverter/TagConverters/ITagConverter.cs new file mode 100644 index 000000000..ef429bce0 --- /dev/null +++ b/cs/Markdown/MarkDownConverter/TagConverters/ITagConverter.cs @@ -0,0 +1,10 @@ +using System.Text; +using Markdown.Token; + +namespace Markdown.MarkDownConverter.TagConverters; + +public interface ITagConverter +{ + bool CanHandle(TokenType type); + void Handle(Token.Token token, Stack tagStack, StringBuilder result); +} \ No newline at end of file diff --git a/cs/Markdown/MarkDownConverter/TagConverters/ItalicConverter.cs b/cs/Markdown/MarkDownConverter/TagConverters/ItalicConverter.cs new file mode 100644 index 000000000..ac42979ad --- /dev/null +++ b/cs/Markdown/MarkDownConverter/TagConverters/ItalicConverter.cs @@ -0,0 +1,30 @@ +using System.Text; +using Markdown.Token; + +namespace Markdown.MarkDownConverter.TagConverters; + +public class ItalicConverter : ITagConverter +{ + public bool CanHandle(TokenType type) => type == TokenType.Italic; + + public void Handle(Token.Token token, Stack tagStack, StringBuilder result) + { + if (token.State == TagState.Open) + { + result.Append(""); + tagStack.Push(token.Type); + } + else + { + if (tagStack.Count > 0 && tagStack.Peek() == token.Type) + { + result.Append(""); + tagStack.Pop(); + } + else + { + result.Append(System.Net.WebUtility.HtmlEncode(token.Text)); + } + } + } +} \ No newline at end of file diff --git a/cs/Markdown/MarkDownConverter/TagConverters/LinkCoverter.cs b/cs/Markdown/MarkDownConverter/TagConverters/LinkCoverter.cs new file mode 100644 index 000000000..25cbf6fb5 --- /dev/null +++ b/cs/Markdown/MarkDownConverter/TagConverters/LinkCoverter.cs @@ -0,0 +1,14 @@ +using System.Text; +using Markdown.Token; + +namespace Markdown.MarkDownConverter.TagConverters; + +public class LinkConverter : ITagConverter +{ + public bool CanHandle(TokenType type) => type == TokenType.Link; + + public void Handle(Token.Token token, Stack tagStack, StringBuilder result) + { + result.Append($"{System.Net.WebUtility.HtmlEncode(token.Text)}"); + } +} \ No newline at end of file diff --git a/cs/Markdown/MarkDownConverter/TagConverters/StrongConverter.cs b/cs/Markdown/MarkDownConverter/TagConverters/StrongConverter.cs new file mode 100644 index 000000000..d1f842f79 --- /dev/null +++ b/cs/Markdown/MarkDownConverter/TagConverters/StrongConverter.cs @@ -0,0 +1,30 @@ +using System.Text; +using Markdown.Token; + +namespace Markdown.MarkDownConverter.TagConverters; + +public class StrongConverter : ITagConverter +{ + public bool CanHandle(TokenType type) => type == TokenType.Strong; + + public void Handle(Token.Token token, Stack tagStack, StringBuilder result) + { + if (token.State == TagState.Open) + { + result.Append(""); + tagStack.Push(token.Type); + } + else + { + if (tagStack.Count > 0 && tagStack.Peek() == token.Type) + { + result.Append(""); + tagStack.Pop(); + } + else + { + result.Append(System.Net.WebUtility.HtmlEncode(token.Text)); + } + } + } +} \ No newline at end of file diff --git a/cs/Markdown/MarkDownConverter/TagConverters/TextConverter.cs b/cs/Markdown/MarkDownConverter/TagConverters/TextConverter.cs new file mode 100644 index 000000000..7f807a0b3 --- /dev/null +++ b/cs/Markdown/MarkDownConverter/TagConverters/TextConverter.cs @@ -0,0 +1,14 @@ +using System.Text; +using Markdown.Token; + +namespace Markdown.MarkDownConverter.TagConverters; + +public class TextConverter : ITagConverter +{ + public bool CanHandle(TokenType type) => type == TokenType.Text; + + public void Handle(Token.Token token, Stack tagStack, StringBuilder result) + { + result.Append(System.Net.WebUtility.HtmlEncode(token.Text)); + } +} \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj new file mode 100644 index 000000000..e407cdc01 --- /dev/null +++ b/cs/Markdown/Markdown.csproj @@ -0,0 +1,11 @@ + + + + Exe + net8.0 + enable + enable + Linux + + + diff --git a/cs/Markdown/Parser/Interface/IMarkdownParser.cs b/cs/Markdown/Parser/Interface/IMarkdownParser.cs new file mode 100644 index 000000000..4d082200c --- /dev/null +++ b/cs/Markdown/Parser/Interface/IMarkdownParser.cs @@ -0,0 +1,6 @@ +namespace Markdown.Parser.Interface; + +public interface IMarkdownParser +{ + IEnumerable Parse(string markdownText); +} diff --git a/cs/Markdown/Parser/Interface/ITokenHandler.cs b/cs/Markdown/Parser/Interface/ITokenHandler.cs new file mode 100644 index 000000000..ee8196a52 --- /dev/null +++ b/cs/Markdown/Parser/Interface/ITokenHandler.cs @@ -0,0 +1,8 @@ +using Markdown.Token; + +namespace Markdown.Parser.Interface; + +public interface ITokenHandler +{ + bool TryHandle(ParsingContext context, out Token.Token token, out int skip); +} \ No newline at end of file diff --git a/cs/Markdown/Parser/MarkdownParser.cs b/cs/Markdown/Parser/MarkdownParser.cs new file mode 100644 index 000000000..63c62f6b1 --- /dev/null +++ b/cs/Markdown/Parser/MarkdownParser.cs @@ -0,0 +1,72 @@ + + +using System.Text; +using Markdown.Parser.Interface; +using Markdown.Parser.TokenHandler; +using Markdown.Token; + +namespace Markdown.Parser; + +public class MarkdownParser : IMarkdownParser +{ + private readonly IList handlers; + + public MarkdownParser() + { + handlers = TokenHandlerFactory.CreateHandlers(); + } + + public IEnumerable Parse(string text) + { + var tokens = new List(); + var openTags = new Stack(); + var textBuffer = new StringBuilder(); + var textStart = 0; + var position = 0; + + if (text is null) + { + return tokens; + } + + while (position < text.Length) + { + var context = new ParsingContext(text, position, openTags); + var handled = false; + + foreach (var handler in handlers) + { + if (!handler.TryHandle(context, out var token, out var skip)) + { + continue; + } + + if (textBuffer.Length > 0) + { + tokens.Add(Token.Token.CreateText(textBuffer.ToString(), textStart)); + textBuffer.Clear(); + } + + tokens.Add(token); + position += skip; + handled = true; + break; + } + + if (handled) + { + continue; + } + + if (textBuffer.Length == 0) + textStart = position; + textBuffer.Append(text[position]); + position++; + } + + if (textBuffer.Length > 0) + tokens.Add(Token.Token.CreateText(textBuffer.ToString(), textStart)); + + return tokens; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/TokenHandler/Handlers/BaseHandler.cs b/cs/Markdown/Parser/TokenHandler/Handlers/BaseHandler.cs new file mode 100644 index 000000000..613a3f9c7 --- /dev/null +++ b/cs/Markdown/Parser/TokenHandler/Handlers/BaseHandler.cs @@ -0,0 +1,24 @@ +using Markdown.Parser.Interface; +using Markdown.Token; + +namespace Markdown.Parser.TokenHandler.Handlers; + +public abstract class BaseTokenHandler : ITokenHandler +{ + protected readonly Delimiter Delimiter; + + public BaseTokenHandler(Delimiter delimiter) + { + Delimiter = delimiter; + } + + public abstract bool TryHandle(ParsingContext context, out Token.Token token, out int skip); + + protected bool IsMatch(string text, int position, string pattern) + { + if (position + pattern.Length > text.Length) + return false; + + return text.Substring(position, pattern.Length) == pattern; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/TokenHandler/Handlers/Delimiter.cs b/cs/Markdown/Parser/TokenHandler/Handlers/Delimiter.cs new file mode 100644 index 000000000..f4e61c3f9 --- /dev/null +++ b/cs/Markdown/Parser/TokenHandler/Handlers/Delimiter.cs @@ -0,0 +1,17 @@ +using Markdown.Token; + +namespace Markdown.Parser.TokenHandler; + +public class Delimiter +{ + public string Opening { get; } + public string Closing { get; } + public TokenType Type { get; } + + public Delimiter(string opening, string closing, TokenType type) + { + Opening = opening; + Closing = closing; + Type = type; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/TokenHandler/Handlers/EscapedCharacterHandler.cs b/cs/Markdown/Parser/TokenHandler/Handlers/EscapedCharacterHandler.cs new file mode 100644 index 000000000..94175ccdc --- /dev/null +++ b/cs/Markdown/Parser/TokenHandler/Handlers/EscapedCharacterHandler.cs @@ -0,0 +1,33 @@ +using Markdown.Token; + +namespace Markdown.Parser.TokenHandler.Handlers +{ + public class EscapedCharacterHandler : BaseTokenHandler + { + public EscapedCharacterHandler() : base(new Delimiter(@"\", @"\", TokenType.Escaped)) + { + } + + public override bool TryHandle(ParsingContext context, out Token.Token token, out int skip) + { + token = null; + skip = 0; + + var text = context.Text; + var position = context.Position; + + if (!IsMatch(text, position, Delimiter.Opening)) + return false; + + if (position + 1 >= text.Length) + return false; + + var escapedChar = text[position + 1]; + token = Token.Token.CreateText(escapedChar.ToString(), position); + skip = 2; + + return true; + } + + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/TokenHandler/Handlers/HeaderHandler.cs b/cs/Markdown/Parser/TokenHandler/Handlers/HeaderHandler.cs new file mode 100644 index 000000000..850f9f6ae --- /dev/null +++ b/cs/Markdown/Parser/TokenHandler/Handlers/HeaderHandler.cs @@ -0,0 +1,43 @@ +using Markdown.Token; + +namespace Markdown.Parser.TokenHandler.Handlers; + +public class HeaderHandler : BaseTokenHandler +{ + public HeaderHandler() : base(new Delimiter("#", "", TokenType.Header)) + { + } + + public override bool TryHandle(ParsingContext context, out Token.Token token, out int skip) + { + token = null; + skip = 0; + + if (!context.IsStartOfLine || !IsMatch(context.Text, context.Position, Delimiter.Opening)) + return false; + + var level = 1; + var position = context.Position + 1; + + while (position < context.Text.Length && context.Text[position] == '#' && level < 6) + { + level++; + position++; + } + + if (position >= context.Text.Length || context.Text[position] != ' ') + return false; + + context.OpenTags.Push(Delimiter.Type); + + token = new Token.Token( + new string('#', level), + TokenType.Header, + TagState.Open, + context.Position, + level); + + skip = level + 1; + return true; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/TokenHandler/Handlers/LinkHandler.cs b/cs/Markdown/Parser/TokenHandler/Handlers/LinkHandler.cs new file mode 100644 index 000000000..ae3cb4ecf --- /dev/null +++ b/cs/Markdown/Parser/TokenHandler/Handlers/LinkHandler.cs @@ -0,0 +1,37 @@ +using Markdown.Token; + +namespace Markdown.Parser.TokenHandler.Handlers; + +public class LinkHandler : BaseTokenHandler +{ + public LinkHandler() : base(new Delimiter("[", "]", TokenType.Link)) + { + } + + public override bool TryHandle(ParsingContext context, out Token.Token token, out int skip) + { + token = null; + skip = 0; + var position = context.Position; + var text = context.Text; + + if (text[position] != '[') + return false; + + var closingBracketIndex = text.IndexOf(']', position); + if (closingBracketIndex == -1 || closingBracketIndex + 1 >= text.Length || text[closingBracketIndex + 1] != '(') + return false; + + var closingParentIndex = text.IndexOf(')', closingBracketIndex + 1); + if (closingParentIndex == -1) + return false; + + var linkText = text.Substring(position + 1, closingBracketIndex - position - 1); + var url = text.Substring(closingBracketIndex + 2, closingParentIndex - closingBracketIndex - 2); + + token = new Token.Token(linkText, TokenType.Link, TagState.Open, position, url: url); + skip = closingParentIndex - position + 1; + + return true; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/TokenHandler/Handlers/PairedTagHandler.cs b/cs/Markdown/Parser/TokenHandler/Handlers/PairedTagHandler.cs new file mode 100644 index 000000000..01ed79020 --- /dev/null +++ b/cs/Markdown/Parser/TokenHandler/Handlers/PairedTagHandler.cs @@ -0,0 +1,142 @@ +using Markdown.Token; + +namespace Markdown.Parser.TokenHandler.Handlers +{ + public class PairedTagHandler : BaseTokenHandler + { + public PairedTagHandler(Delimiter delimiter) : base(delimiter) + { + } + + public override bool TryHandle(ParsingContext context, out Token.Token token, out int skip) + { + token = null; + skip = 0; + + var text = context.Text; + var position = context.Position; + + if (!IsOpeningDelimiter(text, position)) + return false; + + if (HasExcessiveDelimiters(text, position)) + return false; + + if (IsClosingDelimiter(context, text, position, out token, out skip)) + return true; + + return IsOpeningPossible(context, text, position, out token, out skip); + } + + private bool IsOpeningDelimiter(string text, int position) + { + return IsMatch(text, position, Delimiter.Opening); + } + + private bool HasExcessiveDelimiters(string text, int position) + { + var delimiterCount = 0; + while (position + delimiterCount < text.Length && + IsMatch(text, position + delimiterCount, Delimiter.Opening)) + { + delimiterCount++; + } + + return delimiterCount > Delimiter.Opening.Length; + } + + private bool IsClosingDelimiter(ParsingContext context, string text, int position, out Token.Token token, out int skip) + { + token = null; + skip = 0; + + var isClosingPossible = context.OpenTags.Count > 0 && context.OpenTags.Peek() == Delimiter.Type; + if (!isClosingPossible) + { + return false; + } + + if (HasLetterOrDigitAfterDelimiter(text, position)) + return false; + + token = CreateClosingToken(position); + context.OpenTags.Pop(); + skip = Delimiter.Opening.Length; + return true; + } + + private bool IsOpeningPossible(ParsingContext context, string text, int position, out Token.Token token, out int skip) + { + token = null; + skip = 0; + + if (!DelimiterOpeningValid(context, text, position)) + return false; + + var closingPos = FindClosingDelimiter(text, position + Delimiter.Opening.Length); + if (closingPos == -1 || HasWhitespaceBeforeClosing(text, closingPos)) + return false; + + var innerText = text.Substring(position + Delimiter.Opening.Length, closingPos - (position + Delimiter.Opening.Length)); + if (innerText.Contains("\n") || innerText.Contains("\r")) + return false; + + token = CreateOpeningToken(position); + context.OpenTags.Push(Delimiter.Type); + skip = Delimiter.Opening.Length; + return true; + } + + private bool DelimiterOpeningValid(ParsingContext context, string text, int position) + { + if (Delimiter.Opening.Length != 1) + { + return true; + } + + return !HasLetterBefore(text, position) && !HasWhitespaceAfter(text, position); + } + + private bool HasLetterBefore(string text, int position) + { + return position > 0 && char.IsLetterOrDigit(text[position - 1]); + } + + private bool HasWhitespaceAfter(string text, int position) + { + return position + Delimiter.Opening.Length >= text.Length || + char.IsWhiteSpace(text[position + Delimiter.Opening.Length]); + } + + private bool HasLetterOrDigitAfterDelimiter(string text, int position) + { + return position + Delimiter.Opening.Length < text.Length && + char.IsLetterOrDigit(text[position + Delimiter.Opening.Length]); + } + + private bool HasWhitespaceBeforeClosing(string text, int closingPos) + { + return closingPos > 0 && char.IsWhiteSpace(text[closingPos - 1]); + } + + private Token.Token CreateClosingToken(int position) + { + return new Token.Token(Delimiter.Closing, Delimiter.Type, TagState.Close, position); + } + + private Token.Token CreateOpeningToken(int position) + { + return new Token.Token(Delimiter.Opening, Delimiter.Type, TagState.Open, position); + } + + private int FindClosingDelimiter(string text, int startPos) + { + for (var i = startPos; i <= text.Length - Delimiter.Closing.Length; i++) + { + if (IsMatch(text, i, Delimiter.Closing)) + return i; + } + return -1; + } + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser/TokenHandler/Handlers/TokenHandlerFactory.cs b/cs/Markdown/Parser/TokenHandler/Handlers/TokenHandlerFactory.cs new file mode 100644 index 000000000..702b864c5 --- /dev/null +++ b/cs/Markdown/Parser/TokenHandler/Handlers/TokenHandlerFactory.cs @@ -0,0 +1,27 @@ +using Markdown.Parser.Interface; +using Markdown.Parser.TokenHandler.Handlers; +using Markdown.Token; + +namespace Markdown.Parser.TokenHandler; + +public static class TokenHandlerFactory +{ + private static readonly Dictionary Delimiters = new() + { + { TokenType.Strong, new Delimiter("__", "__", TokenType.Strong) }, + { TokenType.Italic, new Delimiter("_", "_", TokenType.Italic) } + }; + + + public static IList CreateHandlers() + { + return new List + { + new EscapedCharacterHandler(), + new LinkHandler(), + new PairedTagHandler(Delimiters[TokenType.Strong]), + new PairedTagHandler(Delimiters[TokenType.Italic]), + new HeaderHandler() + }; + } +} \ No newline at end of file diff --git a/cs/Markdown/Program.cs b/cs/Markdown/Program.cs new file mode 100644 index 000000000..388fa99ec --- /dev/null +++ b/cs/Markdown/Program.cs @@ -0,0 +1,5 @@ +using Markdown; + +var md = new MarkDown(); +var result = md.Render("__bold _italic_ text__"); +Console.WriteLine(result); \ No newline at end of file diff --git a/cs/Markdown/Token/ParsingContext.cs b/cs/Markdown/Token/ParsingContext.cs new file mode 100644 index 000000000..d0f5f8d83 --- /dev/null +++ b/cs/Markdown/Token/ParsingContext.cs @@ -0,0 +1,19 @@ +using Markdown.Parser.TokenHandler; + +namespace Markdown.Token; + +public class ParsingContext +{ + public string Text { get; } + public int Position { get; } + public Stack OpenTags { get; } + + public ParsingContext(string text, int position, Stack? openTags) + { + Text = text; + Position = position; + OpenTags = openTags ?? new Stack(); + } + + public bool IsStartOfLine => Position == 0 || (Position > 0 && Text[Position - 1] == '\n'); +} \ No newline at end of file diff --git a/cs/Markdown/Token/TagState.cs b/cs/Markdown/Token/TagState.cs new file mode 100644 index 000000000..0b1c26073 --- /dev/null +++ b/cs/Markdown/Token/TagState.cs @@ -0,0 +1,7 @@ +namespace Markdown.Token; + +public enum TagState +{ + Open, + Close +} \ No newline at end of file diff --git a/cs/Markdown/Token/Token.cs b/cs/Markdown/Token/Token.cs new file mode 100644 index 000000000..a5982c5c2 --- /dev/null +++ b/cs/Markdown/Token/Token.cs @@ -0,0 +1,39 @@ +using Markdown.Parser.TokenHandler; + +namespace Markdown.Token; + +public class Token +{ + public string Text { get; } + public TokenType Type { get; } + public TagState State { get; } + public int Position { get; } + public int Level { get; } + public string Url { get; } + + public Token(string text, TokenType type, TagState state, int position, int level = 0, string url = null) + { + Text = text; + Type = type; + State = state; + Position = position; + Level = level; + Url = url; + } + + public static Token CreateText(string text, int position) + => new(text, TokenType.Text, TagState.Open, position); + + public static Token CreateStrong(bool isOpening, int position) + => new("__", TokenType.Strong, isOpening ? TagState.Open : TagState.Close, position); + + public static Token CreateItalic(bool isOpening, int position) + => new("_", TokenType.Italic, isOpening ? TagState.Open : TagState.Close, position); + + public static Token CreateHeader(int level, int position) + => new(new string('#', level), TokenType.Header, TagState.Open, position, level); + + public static Token CreateLink(string text, string url, int position) + => new(text, TokenType.Link, TagState.Open, position, url: url); + +} \ No newline at end of file diff --git a/cs/Markdown/Token/TokenType.cs b/cs/Markdown/Token/TokenType.cs new file mode 100644 index 000000000..955cbbe47 --- /dev/null +++ b/cs/Markdown/Token/TokenType.cs @@ -0,0 +1,11 @@ +namespace Markdown.Token; + +public enum TokenType +{ + Text, + Strong, + Italic, + Header, + Link, + Escaped +} \ No newline at end of file diff --git a/cs/clean-code.sln b/cs/clean-code.sln index 2206d54db..769a08bc8 100644 --- a/cs/clean-code.sln +++ b/cs/clean-code.sln @@ -9,6 +9,10 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ControlDigit", "ControlDigi EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Samples", "Samples\Samples.csproj", "{C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown", "Markdown\Markdown.csproj", "{964D393D-8B85-4F4F-A4A6-B96C876BF12C}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MarkDownTest", "MarkDownTest\MarkDownTest.csproj", "{0028163D-10A8-4BDE-BFB3-FF0EFC7C275A}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -27,5 +31,13 @@ Global {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Debug|Any CPU.Build.0 = Debug|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.ActiveCfg = Release|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.Build.0 = Release|Any CPU + {964D393D-8B85-4F4F-A4A6-B96C876BF12C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {964D393D-8B85-4F4F-A4A6-B96C876BF12C}.Debug|Any CPU.Build.0 = Debug|Any CPU + {964D393D-8B85-4F4F-A4A6-B96C876BF12C}.Release|Any CPU.ActiveCfg = Release|Any CPU + {964D393D-8B85-4F4F-A4A6-B96C876BF12C}.Release|Any CPU.Build.0 = Release|Any CPU + {0028163D-10A8-4BDE-BFB3-FF0EFC7C275A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {0028163D-10A8-4BDE-BFB3-FF0EFC7C275A}.Debug|Any CPU.Build.0 = Debug|Any CPU + {0028163D-10A8-4BDE-BFB3-FF0EFC7C275A}.Release|Any CPU.ActiveCfg = Release|Any CPU + {0028163D-10A8-4BDE-BFB3-FF0EFC7C275A}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection EndGlobal