From f96cfd9451179a3087f7d9df1ff8b4e66519a82f Mon Sep 17 00:00:00 2001 From: seppiabrilla Date: Thu, 4 Jan 2024 15:28:26 +0100 Subject: [PATCH] changed Parser to use regex instead of cycles --- package-lock.json | 8 ++-- src/Parser.ts | 81 +++++++++++++++++++++++++++++++- src/WebObsidianBuilder.ts | 8 ++-- tests/Parser.test.ts | 45 ++++++------------ tests/WebObsidianBuilder.test.ts | 34 ++++++++++++++ 5 files changed, 135 insertions(+), 41 deletions(-) create mode 100644 tests/WebObsidianBuilder.test.ts diff --git a/package-lock.json b/package-lock.json index b17696a..ef2a4b1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { - "name": "WebObsidianBuilder", - "version": "0.1.0", + "name": "web-obsidian-builder", + "version": "0.1.2", "lockfileVersion": 2, "requires": true, "packages": { "": { - "name": "WebObsidianBuilder", - "version": "0.1.0", + "name": "web-obsidian-builder", + "version": "0.1.2", "dependencies": { "@types/marked": "^5.0.1", "katex": "^0.16.8", diff --git a/src/Parser.ts b/src/Parser.ts index 20fae45..169a4a1 100644 --- a/src/Parser.ts +++ b/src/Parser.ts @@ -1,6 +1,83 @@ import { MarkdownElement } from './ObsidianElements'; import { Token, MarkdownToken} from './Tokens'; +const REGEX = "([a-zA-Z0-9 \\^\\/,\\.\\*\\!\\@\\#\\%\\^\\&()\\{}_\\-=\\+`~;:'\"<>\\?\\|\\n\\t]+)"; +const CHAR_TO_ESCAPE = ['\\','.','$','*','+','?','(',')','[','{,','|', ']', '-'] + +function addEscape(str:string){ +for (const char of CHAR_TO_ESCAPE){ + str = str.replaceAll(char,`\\${char}`); + } + return str; +} + +function build_regex(open_token:Token, close_token:Token): RegExp{ + let open_token_str: string = Token[open_token]; + let close_token_str: string = Token[close_token]; + const open_tokens_to_remove_before = []; + const open_tokens_to_remove_after = []; + const close_tokens_to_remove_before = []; + const close_tokens_to_remove_after = []; + for(const token of Object.keys(Token)){ + if(isNaN(+token)){ + if(token.length > open_token_str.length){ + let removed_before = false; + if(token.substring(token.length - open_token_str.length, token.length) == open_token_str){ + const tokenToRemove = token.replace(open_token_str, ''); + open_tokens_to_remove_before.push(addEscape(tokenToRemove)); + removed_before = true; + } + if(token.substring(0, open_token_str.length) == open_token_str){ + const tokenToRemove = token.replace(open_token_str, ''); + if(!removed_before && tokenToRemove != open_tokens_to_remove_before[open_tokens_to_remove_before.length - 1]) + open_tokens_to_remove_after.push(addEscape(tokenToRemove)); + } + } + if(token.length > close_token_str.length){ + let removed_after = false; + if(token.substring(0, close_token_str.length) == close_token_str){ + const tokenToRemove = token.replace(close_token_str, ''); + close_tokens_to_remove_after.push(addEscape(tokenToRemove)); + removed_after = true; + } + if(token.substring(token.length - close_token_str.length, token.length) == close_token_str){ + const tokenToRemove = token.replace(close_token_str, ''); + if(!removed_after && tokenToRemove != close_tokens_to_remove_after[close_tokens_to_remove_after.length - 1]) + close_tokens_to_remove_before.push(addEscape(tokenToRemove)); + } + } + } + } + let before = open_tokens_to_remove_before.length > 0 ? `(? 0? `(?!${open_tokens_to_remove_after.join("|")})` : ""; + open_token_str = `${before}${addEscape(open_token_str)}${after}`; + before = close_tokens_to_remove_before.length > 0 ? `(? 0? `(?!${close_tokens_to_remove_after.join("|")})` : ""; + close_token_str = `${before}${addEscape(close_token_str)}${after}`; + return RegExp(`${open_token_str}${REGEX}${close_token_str}`, "g"); +} + +function BuildElements(mdString:string):Array{ + const regexes: Array<{[index: string]: Token | RegExp}> = [ + {'token':Token['![['], 'value':build_regex(Token['![['], Token[']]'])}, + {'token':Token['[['], 'value':build_regex(Token['[['], Token[']]'])}, + {'token':Token.$$, 'value':build_regex(Token['$$'], Token['$$'])}, + {'token':Token.$, 'value':build_regex(Token['$'], Token['$'])}, + {'token':Token['```mermaid'], 'value':build_regex(Token['```mermaid'], Token['```'])}, + {'token':Token['```'], 'value':build_regex(Token['```'], Token['```'])}, + ]; + const elements:Array = []; + for(const val of regexes){ + const token: Token = val['token']; + const regex: RegExp = val['value']; + const regex_results = [...mdString.matchAll(regex)]; + for(const result of regex_results){ + elements.push(new MarkdownElement(result[1], token)); + } + } + return elements; +} + function ToToken(str:string): Token{ for(let i = str.length; i > 0; i --){ @@ -38,7 +115,7 @@ function Tokenize(mdString: string): Array{ const Closer = [Token[']]'], Token.$, Token.$$, Token['```']] -function BuildElements(tokens: Array, mdString:string): Array{ +function old_BuildElements(tokens: Array, mdString:string): Array{ const opened: { [id: string] : MarkdownToken|undefined; } = {}; const elements: Array = []; tokens.sort((a:MarkdownToken, b:MarkdownToken) => { @@ -104,4 +181,4 @@ function BuildElements(tokens: Array, mdString:string): Array { @@ -98,47 +98,32 @@ describe('Tokenize', () => { }); const mdString = '$$math$$ [[link]] ```mermaid m ``` $inline$ $$double math$$ ```py code ``` ![[image]]'; -const tokens = [ - new MarkdownToken( 0, 0), - new MarkdownToken( 0, 6), - new MarkdownToken( 2, 9), - new MarkdownToken( 4, 15), - new MarkdownToken( 5, 18), - new MarkdownToken( 6, 31), - new MarkdownToken( 1, 35), - new MarkdownToken( 1, 42), - new MarkdownToken( 0, 44), - new MarkdownToken( 0, 57), - new MarkdownToken( 6, 60), - new MarkdownToken( 6, 71), - new MarkdownToken( 3, 75), - new MarkdownToken( 4, 83) - ] describe('BuildElements', () => { test('find elements', () =>{ - const elements = BuildElements(tokens, mdString); + const elements = BuildElements(mdString); expect(elements.length).toEqual(7); - expect(elements[0].Value).toEqual("math"); - expect(elements[0].Type).toEqual(Token.$$); + expect(elements[0].Value).toEqual("image"); + expect(elements[0].Type).toEqual(Token['![[']); expect(elements[1].Value).toEqual("link"); expect(elements[1].Type).toEqual(Token['[[']); - expect(elements[2].Value).toEqual(" m "); - expect(elements[2].Type).toEqual(Token['```mermaid']); + expect(elements[2].Value).toEqual("math"); + expect(elements[2].Type).toEqual(Token.$$); - expect(elements[3].Value).toEqual("inline"); - expect(elements[3].Type).toEqual(Token.$); + expect(elements[3].Value).toEqual("double math"); + expect(elements[3].Type).toEqual(Token.$$); - expect(elements[4].Value).toEqual("double math"); - expect(elements[4].Type).toEqual(Token.$$); + expect(elements[4].Value).toEqual("inline"); + expect(elements[4].Type).toEqual(Token.$); - expect(elements[5].Value).toEqual("py code "); - expect(elements[5].Type).toEqual(Token['```']); + expect(elements[5].Value).toEqual(" m "); + expect(elements[5].Type).toEqual(Token['```mermaid']); - expect(elements[6].Value).toEqual("image"); - expect(elements[6].Type).toEqual(Token['![[']); + expect(elements[6].Value).toEqual("py code "); + expect(elements[6].Type).toEqual(Token['```']); }); + }); diff --git a/tests/WebObsidianBuilder.test.ts b/tests/WebObsidianBuilder.test.ts new file mode 100644 index 0000000..924ac2d --- /dev/null +++ b/tests/WebObsidianBuilder.test.ts @@ -0,0 +1,34 @@ +import {ObsidianLink, ObsidianlinkArray} from '../src/Links' +import {WebObsidianBuilder} from '../src/WebObsidianBuilder' + +function remove(str:string):string { + return str.replace(" ",""); +} + + +describe('WebObsidianBuilder', () => { + test('AddAndConvert', () => { + const array = new ObsidianlinkArray([ + new ObsidianLink("name", "link"), + new ObsidianLink("name2", "link2"), + new ObsidianLink("link", "name"), + new ObsidianLink("note", "new_note") + ]) + const builder = new WebObsidianBuilder(array); + const html = builder.AddAndConvert("note", "# Title \n ## Subtitle [[name]], ![[image]] $$e = mc^2$$ $i_{low}$ ```py print('hello')```"); + const graph = builder.GetGraph(); + expect(graph.Nodes.length).toEqual(4); + expect(graph.GetEdgesFrom("note").length).toEqual(1); + expect(graph.GetEdgesFrom("note")[0].To).toEqual("name"); + expect(remove(html)).toEqual(remove(RES)); + }); +}); + + +const RES = `

Title

+

Subtitle name +, image + e=mc2e = mc^2 ilowi_{low} py print('hello')

+`; + +