Skip to content

Commit

Permalink
Merge pull request #2 from SeppiaBrilla/regex-support
Browse files Browse the repository at this point in the history
Parsing through regex
  • Loading branch information
SeppiaBrilla authored Jan 4, 2024
2 parents 1befd68 + 37fef6d commit 9225feb
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 37 deletions.
81 changes: 79 additions & 2 deletions src/Parser.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,83 @@
import { MarkdownElement } from './ObsidianElements';
import { Token, MarkdownToken} from './Tokens';

const REGEX = "([a-zA-Z0-9 \\^\\/,\\.\\*\\!\\@\\#\\%\\^\\&()\\{}_\\-=\\+`~;:'\"<>\\?\\|\\n\\t]+)";
const CHAR_TO_ESCAPE = ['\\','.','$','*','+','?','(',')','[','{,','|', ']', '-']

function addEscape(str:string){
for (const char of CHAR_TO_ESCAPE){
str = str.replaceAll(char,`\\${char}`);
}
return str;
}

function build_regex(open_token:Token, close_token:Token): RegExp{
let open_token_str: string = Token[open_token];
let close_token_str: string = Token[close_token];
const open_tokens_to_remove_before = [];
const open_tokens_to_remove_after = [];
const close_tokens_to_remove_before = [];
const close_tokens_to_remove_after = [];
for(const token of Object.keys(Token)){
if(isNaN(+token)){
if(token.length > open_token_str.length){
let removed_before = false;
if(token.substring(token.length - open_token_str.length, token.length) == open_token_str){
const tokenToRemove = token.replace(open_token_str, '');
open_tokens_to_remove_before.push(addEscape(tokenToRemove));
removed_before = true;
}
if(token.substring(0, open_token_str.length) == open_token_str){
const tokenToRemove = token.replace(open_token_str, '');
if(!removed_before && tokenToRemove != open_tokens_to_remove_before[open_tokens_to_remove_before.length - 1])
open_tokens_to_remove_after.push(addEscape(tokenToRemove));
}
}
if(token.length > close_token_str.length){
let removed_after = false;
if(token.substring(0, close_token_str.length) == close_token_str){
const tokenToRemove = token.replace(close_token_str, '');
close_tokens_to_remove_after.push(addEscape(tokenToRemove));
removed_after = true;
}
if(token.substring(token.length - close_token_str.length, token.length) == close_token_str){
const tokenToRemove = token.replace(close_token_str, '');
if(!removed_after && tokenToRemove != close_tokens_to_remove_after[close_tokens_to_remove_after.length - 1])
close_tokens_to_remove_before.push(addEscape(tokenToRemove));
}
}
}
}
let before = open_tokens_to_remove_before.length > 0 ? `(?<!${open_tokens_to_remove_before.join("|")})` : "";
let after = open_tokens_to_remove_after.length > 0? `(?!${open_tokens_to_remove_after.join("|")})` : "";
open_token_str = `${before}${addEscape(open_token_str)}${after}`;
before = close_tokens_to_remove_before.length > 0 ? `(?<!${close_tokens_to_remove_before.join("|")})` : "";
after = close_tokens_to_remove_after.length > 0? `(?!${close_tokens_to_remove_after.join("|")})` : "";
close_token_str = `${before}${addEscape(close_token_str)}${after}`;
return RegExp(`${open_token_str}${REGEX}${close_token_str}`, "g");
}

function BuildElements(mdString:string):Array<MarkdownElement>{
const regexes: Array<{[index: string]: Token | RegExp}> = [
{'token':Token['![['], 'value':build_regex(Token['![['], Token[']]'])},
{'token':Token['[['], 'value':build_regex(Token['[['], Token[']]'])},
{'token':Token.$$, 'value':build_regex(Token['$$'], Token['$$'])},
{'token':Token.$, 'value':build_regex(Token['$'], Token['$'])},
{'token':Token['```mermaid'], 'value':build_regex(Token['```mermaid'], Token['```'])},
{'token':Token['```'], 'value':build_regex(Token['```'], Token['```'])},
];
const elements:Array<MarkdownElement> = [];
for(const val of regexes){
const token: Token = <Token>val['token'];
const regex: RegExp = <RegExp>val['value'];
const regex_results = [...mdString.matchAll(regex)];
for(const result of regex_results){
elements.push(new MarkdownElement(result[1], token));
}
}
return elements;
}

function ToToken(str:string): Token{

for(let i = str.length; i > 0; i --){
Expand Down Expand Up @@ -38,7 +115,7 @@ function Tokenize(mdString: string): Array<MarkdownToken>{
const Closer = [Token[']]'], Token.$, Token.$$, Token['```']]


function BuildElements(tokens: Array<MarkdownToken>, mdString:string): Array<MarkdownElement>{
function old_BuildElements(tokens: Array<MarkdownToken>, mdString:string): Array<MarkdownElement>{
const opened: { [id: string] : MarkdownToken|undefined; } = {};
const elements: Array<MarkdownElement> = [];
tokens.sort((a:MarkdownToken, b:MarkdownToken) => {
Expand Down Expand Up @@ -104,4 +181,4 @@ function BuildElements(tokens: Array<MarkdownToken>, mdString:string): Array<Mar
return elements;
}

export { Tokenize, BuildElements };
export { Tokenize, old_BuildElements, BuildElements, build_regex };
8 changes: 3 additions & 5 deletions src/WebObsidianBuilder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { ObsidianlinkArray } from './Links';
import { Graph } from './Graph';
import { MarkdownElement, MathElement, Element, LinkElement, MermaidElement, VisualLinkElement, MathClass} from './ObsidianElements';
import { Token } from './Tokens';
import { Tokenize, BuildElements } from './Parser';
import { BuildElements } from './Parser';
import { randomUUID } from "crypto";
import { marked } from 'marked';

Expand Down Expand Up @@ -35,16 +35,14 @@ class WebObsidianBuilder{

AddAndConvert(noteName:string, noteText:string){
const from = this.NoteNames.indexOf(noteName);
const tokens = Tokenize(noteText);
const elements = BuildElements(tokens, noteText);
const elements = BuildElements(noteText);

noteText = this.RemoveElementAndConvert(noteText, elements, from);
return this.Rebuild(noteText);
}

Convert(noteText:string){
const tokens = Tokenize(noteText);
const elements = BuildElements(tokens, noteText);
const elements = BuildElements(noteText);

noteText = this.RemoveElementAndConvert(noteText, elements);
return this.Rebuild(noteText);
Expand Down
45 changes: 15 additions & 30 deletions tests/Parser.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Tokenize, BuildElements } from '../src/Parser';
import { Tokenize, BuildElements} from '../src/Parser';
import { MarkdownToken, Token } from '../src/Tokens';

describe('Tokenize', () => {
Expand Down Expand Up @@ -98,47 +98,32 @@ describe('Tokenize', () => {
});

const mdString = '$$math$$ [[link]] ```mermaid m ``` $inline$ $$double math$$ ```py code ``` ![[image]]';
const tokens = [
new MarkdownToken( 0, 0),
new MarkdownToken( 0, 6),
new MarkdownToken( 2, 9),
new MarkdownToken( 4, 15),
new MarkdownToken( 5, 18),
new MarkdownToken( 6, 31),
new MarkdownToken( 1, 35),
new MarkdownToken( 1, 42),
new MarkdownToken( 0, 44),
new MarkdownToken( 0, 57),
new MarkdownToken( 6, 60),
new MarkdownToken( 6, 71),
new MarkdownToken( 3, 75),
new MarkdownToken( 4, 83)
]

describe('BuildElements', () => {
test('find elements', () =>{
const elements = BuildElements(tokens, mdString);
const elements = BuildElements(mdString);

expect(elements.length).toEqual(7);
expect(elements[0].Value).toEqual("math");
expect(elements[0].Type).toEqual(Token.$$);
expect(elements[0].Value).toEqual("image");
expect(elements[0].Type).toEqual(Token['![[']);

expect(elements[1].Value).toEqual("link");
expect(elements[1].Type).toEqual(Token['[[']);

expect(elements[2].Value).toEqual(" m ");
expect(elements[2].Type).toEqual(Token['```mermaid']);
expect(elements[2].Value).toEqual("math");
expect(elements[2].Type).toEqual(Token.$$);

expect(elements[3].Value).toEqual("inline");
expect(elements[3].Type).toEqual(Token.$);
expect(elements[3].Value).toEqual("double math");
expect(elements[3].Type).toEqual(Token.$$);

expect(elements[4].Value).toEqual("double math");
expect(elements[4].Type).toEqual(Token.$$);
expect(elements[4].Value).toEqual("inline");
expect(elements[4].Type).toEqual(Token.$);

expect(elements[5].Value).toEqual("py code ");
expect(elements[5].Type).toEqual(Token['```']);
expect(elements[5].Value).toEqual(" m ");
expect(elements[5].Type).toEqual(Token['```mermaid']);

expect(elements[6].Value).toEqual("image");
expect(elements[6].Type).toEqual(Token['![[']);
expect(elements[6].Value).toEqual("py code ");
expect(elements[6].Type).toEqual(Token['```']);
});

});
34 changes: 34 additions & 0 deletions tests/WebObsidianBuilder.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import {ObsidianLink, ObsidianlinkArray} from '../src/Links'
import {WebObsidianBuilder} from '../src/WebObsidianBuilder'

function remove(str:string):string {
return str.replace(" ","");
}


describe('WebObsidianBuilder', () => {
test('AddAndConvert', () => {
const array = new ObsidianlinkArray([
new ObsidianLink("name", "link"),
new ObsidianLink("name2", "link2"),
new ObsidianLink("link", "name"),
new ObsidianLink("note", "new_note")
])
const builder = new WebObsidianBuilder(array);
const html = builder.AddAndConvert("note", "# Title \n ## Subtitle [[name]], ![[image]] $$e = mc^2$$ $i_{low}$ ```py print('hello')```");
const graph = builder.GetGraph();
expect(graph.Nodes.length).toEqual(4);
expect(graph.GetEdgesFrom("note").length).toEqual(1);
expect(graph.GetEdgesFrom("note")[0].To).toEqual("name");
expect(remove(html)).toEqual(remove(RES));
});
});


const RES = `<h1>Title</h1>
<h2>Subtitle <a href="link">name</a>
, <img src="image" alt="image">
<span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mi>e</mi><mo>=</mo><mi>m</mi><msup><mi>c</mi><mn>2</mn></msup></mrow><annotation encoding="application/x-tex">e = mc^2</annotation></semantics></math></span><span class="katex-html ObsidianMath" aria-hidden="true"><span class="base"><span class="strut" style="height:0.4306em;"></span><span class="mord mathnormal">e</span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:0.8641em;"></span><span class="mord mathnormal">m</span><span class="mord"><span class="mord mathnormal">c</span><span class="msupsub"><span class="vlist-t"><span class="vlist-r"><span class="vlist" style="height:0.8641em;"><span style="top:-3.113em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span></span></span></span></span> <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>i</mi><mrow><mi>l</mi><mi>o</mi><mi>w</mi></mrow></msub></mrow><annotation encoding="application/x-tex">i_{low}</annotation></semantics></math></span><span class="katex-html ObsidianMath" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8095em;vertical-align:-0.15em;"></span><span class="mord"><span class="mord mathnormal">i</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.3361em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathnormal mtight" style="margin-right:0.01968em;">l</span><span class="mord mathnormal mtight">o</span><span class="mord mathnormal mtight" style="margin-right:0.02691em;">w</span></span></span></span></span><span class="vlist-s">​</span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> <code>py print(&#39;hello&#39;)</code></h2>
`;


0 comments on commit 9225feb

Please sign in to comment.