Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Косторной Дмитрий #238

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions cs/Markdown/Markdown.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

</Project>
20 changes: 20 additions & 0 deletions cs/Markdown/Md.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
using Markdown.Parsers;
using Markdown.Renderers;

namespace Markdown;

public class Md
{
private readonly HtmlRenderer renderer;

public Md()
{
renderer = new HtmlRenderer();
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

html renderer захардкожен, что если понадобиться отрисовать xml?
предлагаю выделить интерфейс отрисовщика и передавать реализацию в конструкторе

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

то же самое про MarkdownParser
реализация прибита, переиспользовать не получится

}

public string Render(string markdownText)
{
var tokens = MarkdownParser.ParseTokens(markdownText);
return renderer.Render(tokens);
}
}
211 changes: 211 additions & 0 deletions cs/Markdown/Parsers/MarkdownParser.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
using Markdown.Tokens;

namespace Markdown.Parsers;

public abstract class MarkdownParser
Copy link

@shiyois shiyois Nov 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Огромный, трудночитаемый и плохо расширяемый класс
Реализация не удовлетворяет требованию на полное решение: Решение разбито на составные части, каждая из которых легко читается

Copy link

@shiyois shiyois Nov 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Можно попробовать начать с выделения отдельных классов ответственные за обработку определённого токена токенов

{
public static IEnumerable<Token> ParseTokens(string markdownText)
{
if (markdownText == null)
throw new ArgumentNullException(nameof(markdownText));

var context = new MarkdownParseContext
{
MarkdownText = markdownText
};

while (context.CurrentIndex < context.MarkdownText.Length)
{
var current = context.MarkdownText[context.CurrentIndex];
var next = context.CurrentIndex + 1 < context.MarkdownText.Length ?
context.MarkdownText[context.CurrentIndex + 1] : '\0';

switch (current)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Switch уже выглядит громоздким, а что если понадобиться добавить новые токены? При добавлении нового типа токена придётся ручками добавить здесь его обработку.
Предлагаю завести список обработчиков токенов. Итеративно пытаемся распарсить токен с помощью разных обработчиков, если получилось, то сохраняем токен

{
case '\\':
HandleEscapeCharacter(next, context);
break;
case '_':
if (next == '_')
HandleStrongToken(context);
else
HandleEmphasisToken(context);
break;
case '#' when (context.CurrentIndex == 0 ||
context.MarkdownText[context.CurrentIndex - 1] == '\n') && next == ' ':
context.HeaderLevel = HandleHeaderToken(context);
break;
case '\n' when context.Stack.Count > 0 && context.Stack.Peek().Type == TokenType.Header:
HandleNewLine(context);
break;
default:
context.Buffer.Append(current);
context.CurrentIndex++;
break;
}

if (context.CurrentIndex != context.MarkdownText.Length ||
context.Stack.Count <= 0 || context.Stack.Peek().Type != TokenType.Header) continue;
AddToken(context, TokenType.Text);
context.Tokens.Add(context.Stack.Pop());
}
AddToken(context, TokenType.Text);
return context.Tokens;
}

private static void HandleEscapeCharacter(char next, MarkdownParseContext context)
{
if (next is '_' or '#' or '\\')
{
if (next != '\\')
context.Buffer.Append(next);
context.CurrentIndex += 2;
}
else
{
context.Buffer.Append('\\');
context.CurrentIndex++;
}
}

private static void HandleStrongToken(MarkdownParseContext context)
{
if (IsValidBoundary(context,"__"))
{
HandleTokenBoundary(context, TokenType.Strong);
context.CurrentIndex += 2;
}
else
{
context.Buffer.Append("__");
context.CurrentIndex += 2;
}
}

private static void HandleEmphasisToken(MarkdownParseContext context)
{
if (IsValidBoundary(context, "_"))
{
HandleTokenBoundary(context, TokenType.Emphasis);
context.CurrentIndex++;
}
else
{
context.Buffer.Append('_');
context.CurrentIndex++;
}
}

private static int HandleHeaderToken(MarkdownParseContext context)
{
while (context.CurrentIndex < context.MarkdownText.Length &&
context.MarkdownText[context.CurrentIndex] == '#')
{
context.HeaderLevel++;
context.CurrentIndex++;
}

if (context.CurrentIndex < context.MarkdownText.Length &&
context.MarkdownText[context.CurrentIndex] == ' ')
{
context.CurrentIndex++;

AddToken(context, TokenType.Text);
var headerToken = new Token(TokenType.Header)
{
HeaderLevel = context.HeaderLevel
};

context.Tokens.Add(headerToken);

var headerEnd = context.MarkdownText.IndexOf('\n', context.CurrentIndex);
if (headerEnd == -1)
headerEnd = context.MarkdownText.Length;

var headerContent = ParseTokens(context.MarkdownText[context.CurrentIndex..headerEnd]);

foreach (var childToken in headerContent)
{
headerToken.Children.Add(childToken);
}
context.CurrentIndex = headerEnd;
}
else
{
context.Buffer.Append('#', context.HeaderLevel);
}

return context.HeaderLevel;
}

private static void HandleNewLine(MarkdownParseContext context)
{
AddToken(context, TokenType.Text);
context.Tokens.Add(context.Stack.Pop());
context.CurrentIndex++;
}

private static void HandleTokenBoundary(MarkdownParseContext context, TokenType type)
{
AddToken(context, TokenType.Text);

if (context.Stack.Count > 0 && context.Stack.Peek().Type == type)
{
var completedToken = context.Stack.Pop();

completedToken.Content = completedToken.Children.Count > 0 ? string.Empty : completedToken.Content;
context.Buffer.Clear();

if (context.Stack.Count > 0)
context.Stack.Peek().Children.Add(completedToken);
else
context.Tokens.Add(completedToken);
}
else
{
var newToken = new Token(type);
context.Stack.Push(newToken);
}
}

private static void AddToken(MarkdownParseContext context, TokenType type)
{
if (context.Buffer.Length == 0) return;
var token = new Token(type, context.Buffer.ToString());
context.Buffer.Clear();

if (context.Stack.Count > 0)
context.Stack.Peek().Children.Add(token);
else
context.Tokens.Add(token);
}

private static bool IsValidBoundary(MarkdownParseContext context, string delimiter)
{
var index = context.CurrentIndex;
var text = context.MarkdownText;
if (context.Stack.Count > 0)
{
if (context.Buffer.Length == 0)
return false;
if (index == 0 || index == text.Length - 1)
return true;
return !char.IsLetterOrDigit(text[index - 1]) ||
!char.IsLetterOrDigit(context.MarkdownText[index + 1]);
}

var closingIndex = text.IndexOf(delimiter, index + delimiter.Length, StringComparison.Ordinal);
if (closingIndex == -1)
return false;

var isInsideWord = (index > 0 && char.IsLetterOrDigit(text[index - 1])) ||
(closingIndex + delimiter.Length < text.Length &&
char.IsLetterOrDigit(text[closingIndex + delimiter.Length]));
if (isInsideWord)
return false;

if (closingIndex - index <= delimiter.Length)
return false;
return index + 1 != closingIndex;
}
}
13 changes: 13 additions & 0 deletions cs/Markdown/Parsers/MarkdownParserContext.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
using Markdown.Tokens;
using System.Text;
namespace Markdown.Parsers;

public class MarkdownParseContext
{
public Stack<Token> Stack { get; set; } = new();
public List<Token> Tokens { get; set; } = new();
public StringBuilder Buffer { get; set; } = new();
public string MarkdownText { get; set; } = "";
public int CurrentIndex { get; set; }
public int HeaderLevel { get; set; }
}
59 changes: 59 additions & 0 deletions cs/Markdown/Renderers/HtmlRenderer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
using System.Text;
using Markdown.Tokens;

namespace Markdown.Renderers;

public class HtmlRenderer
{
public string Render(IEnumerable<Token> tokens)
{
var result = new StringBuilder();
foreach (var token in tokens)
{
RenderToken(token, result);
}
return result.ToString();
}

private void RenderToken(Token token, StringBuilder result)
Copy link

@shiyois shiyois Nov 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Можно воспользоваться фабричным метом. На каждый тип возвращать соответствующий ITokenConverter.
Избавимся от громоздкого switch, сможем переиспользовать при добавлении нового Renderer-а в будущем

{
switch (token.Type)
{
case TokenType.Text:
result.Append(token.Content);
break;
case TokenType.Emphasis:
result.Append("<em>");
RenderChildren(token, result);
result.Append("</em>");
break;
case TokenType.Strong:
result.Append("<strong>");
RenderChildren(token, result);
result.Append("</strong>");
break;
case TokenType.Header:
var level = token.HeaderLevel;
result.Append($"<h{level}>");
RenderChildren(token, result);
result.Append($"</h{level}>");
break;
default:
result.Append(token.Content);
break;
}
}

private void RenderChildren(Token token, StringBuilder result)
{
if (token.Children.Count > 0)
{
foreach (var child in token.Children)
{
RenderToken(child, result);
}
}
else
result.Append(token.Content);
}
}
23 changes: 23 additions & 0 deletions cs/Markdown/Tokens/Token.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
namespace Markdown.Tokens;

public class Token
{
public TokenType Type { get; }
public string Content { get; set; }
public List<Token> Children { get; }
public int HeaderLevel { get; init; }
public Token(TokenType type, string content, List<Token>? children = null)
{
Type = type;
Content = content;
Children = children ?? [];
HeaderLevel = 1;
}

public Token(TokenType type)
{
Type = type;
Content = string.Empty;
Children = [];
}
}
9 changes: 9 additions & 0 deletions cs/Markdown/Tokens/TokenType.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
namespace Markdown.Tokens;

public enum TokenType
{
Text,
Emphasis,
Strong,
Header
}
Loading