Skip to content

Commit

Permalink
Flat AST with types
Browse files Browse the repository at this point in the history
  • Loading branch information
brunojppb committed Jun 1, 2024
1 parent 3126602 commit 0819dc2
Show file tree
Hide file tree
Showing 6 changed files with 178 additions and 152 deletions.
49 changes: 35 additions & 14 deletions md_parser/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,47 @@ use serde::{Deserialize, Serialize};

#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
pub enum Node<'s> {
Header(Header<'s>),
Paragraph(Paragraph<'s>),
Link(Link<'s>),
Bold(Bold<'s>),
Italic(Italic<'s>),
Digit(&'s str),
Text(&'s str),
LineBreak,
}

#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
pub struct Header<'s> {
pub level: u8,
#[serde(borrow)]
Block(BlockNode<'s>),
Inline(InlineNode<'s>),
pub children: Vec<Node<'s>>,
}

/// Block level elements
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
pub enum BlockNode<'s> {
Heading(u8, Vec<InlineNode<'s>>), // (heading level, elements)
pub struct Paragraph<'s> {
#[serde(borrow)]
Paragraph(Vec<InlineNode<'s>>),
pub children: Vec<Node<'s>>,
}

/// inline level elements
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
pub enum InlineNode<'s> {
Bold(Vec<InlineNode<'s>>),
Italic(Vec<InlineNode<'s>>),
Link(Vec<InlineNode<'s>>, Vec<InlineNode<'s>>), // (elements, url)
Digit(usize),
Text(&'s str),
LineBreak,
pub struct Link<'s> {
#[serde(borrow)]
pub children: Vec<Node<'s>>,
/// List of Text nodes
pub url: Vec<Node<'s>>,
// TODO: Support title for tooltips
// title: Option<&'s str>
}

#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
pub struct Bold<'s> {
#[serde(borrow)]
pub children: Vec<Node<'s>>,
}

#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
pub struct Italic<'s> {
#[serde(borrow)]
pub children: Vec<Node<'s>>,
}
2 changes: 2 additions & 0 deletions md_parser/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ impl<'a> Lexer<'a> {
}
}

/// A token can only be within the ASCII space
/// and must belong into our list of reserved symbols
fn is_token(&self, c: Option<u8>) -> bool {
match c {
Some(c) => {
Expand Down
37 changes: 21 additions & 16 deletions md_parser/src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::ast::{BlockNode, InlineNode, Node};
use crate::ast::{Bold, Header, Link, Node, Paragraph};
use crate::token::{Span, Token};

use std::cmp::max;
Expand Down Expand Up @@ -112,15 +112,15 @@ impl<'source> Parser<'source> {
if heading_level > 0 && heading_level <= 6 && self.match_token(Token::Space) {
let mut inline_elements = Vec::new();
while let Some(inline) = self.inline(Parent::Block) {
if inline == InlineNode::LineBreak {
if inline == Node::LineBreak {
break;
}
inline_elements.push(inline)
}
return Some(Node::Block(BlockNode::Heading(
heading_level,
inline_elements,
)));
return Some(Node::Header(Header {
level: heading_level,
children: inline_elements,
}));
}

// in case of detected hashes, at this point,
Expand Down Expand Up @@ -152,10 +152,12 @@ impl<'source> Parser<'source> {
return None;
}

Some(Node::Block(BlockNode::Paragraph(inline_elements)))
Some(Node::Paragraph(Paragraph {
children: inline_elements,
}))
}

fn inline(&mut self, parent: Parent) -> Option<InlineNode<'source>> {
fn inline(&mut self, parent: Parent) -> Option<Node<'source>> {
if self.is_at_end() {
return None;
}
Expand All @@ -167,7 +169,7 @@ impl<'source> Parser<'source> {
Token::Newline if self.check_next(Token::Newline) => {
return None;
}
Token::Newline => InlineNode::LineBreak,
Token::Newline => Node::LineBreak,
Token::Star => return self.maybe_bold(),
Token::LeftSquareBracket if parent == Parent::Block => return self.maybe_link(),
Token::Text(_)
Expand All @@ -182,7 +184,7 @@ impl<'source> Parser<'source> {
| Token::RightParen
| Token::LeftSquareBracket
| Token::RightSquareBracket
| Token::Backslash => InlineNode::Text(token.literal()),
| Token::Backslash => Node::Text(token.literal()),
t if t.is_block_level_token() => return None,
t => todo!("unhandled token: {}", t),
};
Expand All @@ -193,7 +195,7 @@ impl<'source> Parser<'source> {
None
}

fn maybe_link(&mut self) -> Option<InlineNode<'source>> {
fn maybe_link(&mut self) -> Option<Node<'source>> {
let mut markers: [u8; 4] = [0, 0, 0, 0];
let rewind_position = self.current;
// Any inline element can partially show-up and should be represented as text,
Expand Down Expand Up @@ -252,16 +254,19 @@ impl<'source> Parser<'source> {

self.consume(&Token::RightParen);

return Some(InlineNode::Link(link_text, url));
return Some(Node::Link(Link {
children: link_text,
url,
}));
}

// Otherwise we bail, rewind and let the next loop handle
// each token as as normal text or other inline elements
self.consume(&Token::LeftSquareBracket);
Some(InlineNode::Text(Token::LeftSquareBracket.literal()))
Some(Node::Text(Token::LeftSquareBracket.literal()))
}

fn maybe_bold(&mut self) -> Option<InlineNode<'source>> {
fn maybe_bold(&mut self) -> Option<Node<'source>> {
let mut markers: [u8; 2] = [0, 0];
let rewind_position = self.current;
'outer: while markers != [1, 1] && !self.is_at_end() {
Expand Down Expand Up @@ -303,14 +308,14 @@ impl<'source> Parser<'source> {
// Consume the wrapping "**" around bold tokens
self.consume(&Token::Star);
self.consume(&Token::Star);
return Some(InlineNode::Bold(inner));
return Some(Node::Bold(Bold { children: inner }));
}

// Otherwise we bail, rewind and let the next loop handle each token
// be handled as normal text or other inline elements
self.rewind(rewind_position);
self.consume(&Token::Star);
Some(InlineNode::Text(Token::Star.literal()))
Some(Node::Text(Token::Star.literal()))
}

fn consume(&mut self, kind: &Token) -> &Token {
Expand Down
52 changes: 25 additions & 27 deletions md_parser/src/renderer.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
use crate::{
ast::{BlockNode, InlineNode, Node},
lexer::Lexer,
parser::Parser,
};
use crate::{ast::Node, lexer::Lexer, parser::Parser};

/// Renders an HTML string from the given AST
///
Expand Down Expand Up @@ -31,60 +27,62 @@ fn render(ast: Vec<Node>) -> String {

fn visit(buffer: &mut String, node: &Node) {
match node {
Node::Block(block) => visit_block(buffer, block),
Node::Inline(inline) => visit_inline(buffer, inline),
Node::Header(_) | Node::Paragraph(_) => visit_block(buffer, node),
node => visit_inline(buffer, node),
}
}

fn visit_block(buffer: &mut String, node: &BlockNode) {
fn visit_block(buffer: &mut String, node: &Node) {
match node {
BlockNode::Heading(level, inline_nodes) => {
buffer.push_str(&format!("<h{}>", level));
visit_inline_nodes(buffer, inline_nodes);
buffer.push_str(&format!("</h{}>", level));
Node::Header(header) => {
buffer.push_str(&format!("<h{}>", header.level));
visit_inline_nodes(buffer, &header.children);
buffer.push_str(&format!("</h{}>", header.level));
}
BlockNode::Paragraph(inline_nodes) => {
Node::Paragraph(paragraph) => {
buffer.push_str("<p>");
for (idx, node) in inline_nodes.iter().enumerate() {
for (idx, node) in paragraph.children.iter().enumerate() {
// Within a paragraph, whenever we hit the last node
// and it's a newline, we can just discard it as the
// paragraph element behaves itself as a block.
if idx >= inline_nodes.len() - 1 && node == &InlineNode::LineBreak {
if idx >= paragraph.children.len() - 1 && node == &Node::LineBreak {
continue;
}
visit_inline(buffer, node);
}
buffer.push_str("</p>");
}
_ => panic!("Node {:#?} not supported as a block node type", node),
}
}

fn visit_inline(buffer: &mut String, node: &InlineNode) {
fn visit_inline(buffer: &mut String, node: &Node) {
match node {
InlineNode::Text(txt) => buffer.push_str(txt),
InlineNode::Bold(inline_nodes) => {
Node::Text(txt) => buffer.push_str(txt),
Node::Bold(bold) => {
buffer.push_str("<strong>");
visit_inline_nodes(buffer, inline_nodes);
visit_inline_nodes(buffer, &bold.children);
buffer.push_str("</strong>");
}
InlineNode::Digit(d) => buffer.push_str(&d.to_string()),
InlineNode::LineBreak => buffer.push_str("<br>"),
InlineNode::Italic(inline_nodes) => {
Node::Digit(d) => buffer.push_str(d),
Node::LineBreak => buffer.push_str("<br>"),
Node::Italic(italic) => {
buffer.push_str("<em>");
visit_inline_nodes(buffer, inline_nodes);
visit_inline_nodes(buffer, &italic.children);
buffer.push_str("</em>");
}
InlineNode::Link(text_nodes, link_nodes) => {
Node::Link(link) => {
buffer.push_str(r#"<a href=""#);
visit_inline_nodes(buffer, link_nodes);
visit_inline_nodes(buffer, &link.url);
buffer.push_str(r#"">"#);
visit_inline_nodes(buffer, text_nodes);
visit_inline_nodes(buffer, &link.children);
buffer.push_str("</a>");
}
_ => panic!("Node {:#?} not supported as a inline node type", node),
}
}

fn visit_inline_nodes(buffer: &mut String, nodes: &[InlineNode]) {
fn visit_inline_nodes(buffer: &mut String, nodes: &[Node]) {
for inline in nodes.iter() {
visit_inline(buffer, inline);
}
Expand Down
Loading

0 comments on commit 0819dc2

Please sign in to comment.