Skip to content

Commit

Permalink
Use open inline parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
zampino committed Apr 10, 2024
1 parent 567ca17 commit adee38d
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 51 deletions.
3 changes: 3 additions & 0 deletions deps.edn
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@
:git/sha "e8f275b5cf077ec9441e404c1885ff0b6ee0aef9"
:deps/root "render"}}}

:commonmark-java-local
{:extra-paths ["../commonmark-java/commonmark/target/classes"]}

:build
{:ns-default build
:deps {io.github.clojure/tools.build {:git/tag "v0.6.1" :git/sha "515b334"}
Expand Down
59 changes: 35 additions & 24 deletions notebooks/benchmarks.clj
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
[nextjournal.clerk :as clerk]
[nextjournal.clerk.eval :as clerk.eval]
[nextjournal.markdown :as md]
[nextjournal.markdown.parser2 :as parser2]
parsing-extensibility
[nextjournal.markdown.parser :as md.parser]))

Expand All @@ -22,30 +23,40 @@
(md.parser/parse (update md.parser/empty-doc :text-tokenizers concat extra-tokenizers)
(md/tokenize text))))

;; Default set of tokenizers
(time-ms (parse reference-text))

;; With an extra brace-brace parser
(time-ms (parse [{:regex #"\{\{([^\{]+)\}\}"
:handler (fn [m] {:type :var :text (m 1)})}]
reference-text))

;; With the losange reader
(time-ms (parse [{:regex #"\{\{([^\{]+)\}\}"
:handler (fn [m] {:type :var :text (m 1)})}
{:tokenizer-fn parsing-extensibility/losange-tokenizer-fn
:handler (fn [data] {:type :losange :data data})}]
reference-text))

;; With hashtags and internal links
(time-ms
(parse [md.parser/hashtag-tokenizer
md.parser/internal-link-tokenizer
{:regex #"\{\{([^\{]+)\}\}"
:handler (fn [m] {:type :var :text (m 1)})}
{:tokenizer-fn parsing-extensibility/losange-tokenizer-fn
:handler (fn [data] {:type :losange :data data})}]
reference-text))
(comment

;; Default set of tokenizers
(time-ms (parse reference-text))
(time-ms (parser2/parse reference-text))

(-> (parse reference-text)
:content count )

(-> (parser2/parse reference-text)
:content count )


;; With an extra brace-brace parser
(time-ms (parse [{:regex #"\{\{([^\{]+)\}\}"
:handler (fn [m] {:type :var :text (m 1)})}]
reference-text))

;; With the losange reader
(time-ms (parse [{:regex #"\{\{([^\{]+)\}\}"
:handler (fn [m] {:type :var :text (m 1)})}
{:tokenizer-fn parsing-extensibility/losange-tokenizer-fn
:handler (fn [data] {:type :losange :data data})}]
reference-text))

;; With hashtags and internal links
(time-ms
(parse [md.parser/hashtag-tokenizer
md.parser/internal-link-tokenizer
{:regex #"\{\{([^\{]+)\}\}"
:handler (fn [m] {:type :var :text (m 1)})}
{:tokenizer-fn parsing-extensibility/losange-tokenizer-fn
:handler (fn [data] {:type :losange :data data})}]
reference-text)))

^{::clerk/visibility {:code :hide :result :hide}}
(comment
Expand Down
32 changes: 5 additions & 27 deletions src/nextjournal/markdown/parser2.clj
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
[clojure.zip :as z]
[nextjournal.markdown.parser :as parser]
[nextjournal.markdown.parser2.types]
[nextjournal.markdown.parser2.footnotes :as footnotes])
[nextjournal.markdown.parser2.footnotes :as footnotes]
[nextjournal.markdown.parser2.formulas :as formulas])
(:import (org.commonmark.parser Parser Parser$ParserExtension Parser$Builder)
(org.commonmark.parser.delimiter DelimiterProcessor)
(org.commonmark.ext.task.list.items TaskListItemsExtension TaskListItemMarker)
Expand Down Expand Up @@ -49,29 +50,6 @@
;; - [ ] promote single images as blocks
;; - [ ] [[TOC]] (although not used in Clerk)

(def InlineFormulaExtension
(proxy [Object Parser$ParserExtension] []
(extend [^Parser$Builder pb]
(.customDelimiterProcessor
pb
(proxy [Object DelimiterProcessor] []
(getOpeningCharacter [] \$)
(getClosingCharacter [] \$)
(getMinLength [] 1)
(process [open close]
(if (and (= 1 (.length open))
(= 1 (.length close)))
(let [text (str/join
(keep #(when (instance? Text %) (.getLiteral %))
(Nodes/between (.. open getOpener) (.. close getCloser))))]
(doseq [^Node n (Nodes/between (.. open getOpener)
(.. close getCloser))]
(.unlink n))
(.. open getOpener
;; needs a named class `gen-class`
(insertAfter (new InlineFormula text)))
1)
0)))))))

(comment
(parse "* this is inline $\\phi$ math
Expand All @@ -80,9 +58,9 @@
(def ^Parser parser
(.. Parser
builder
(extensions [(TaskListItemsExtension/create)
InlineFormulaExtension
(footnotes/extension)])
(extensions [(formulas/extension)
(footnotes/extension)
(TaskListItemsExtension/create)])
build))

;; helpers / ctx
Expand Down
39 changes: 39 additions & 0 deletions src/nextjournal/markdown/parser2/formulas.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
(ns nextjournal.markdown.parser2.formulas
(:import (nextjournal.markdown.parser2.types InlineFormula)
(org.commonmark.node Node)
(org.commonmark.internal InlineParserImpl)
(org.commonmark.internal.inline InlineContentParser InlineParserState ParsedInline)
(org.commonmark.parser InlineParserFactory Parser Parser$ParserExtension Parser$Builder)))

(defn inline-formula-parser []
(proxy [InlineContentParser] []
(tryParse [^InlineParserState parser-state]

(let [scanner (.scanner parser-state)
dollars-open (.matchMultiple scanner \$)
after-opening (.position scanner)]

(if (< 0 (.find scanner \$))
(let [before-closing (.position scanner)
dollars-close (.matchMultiple scanner \$)]
(if (= dollars-open dollars-close)
(let [^String source (.getContent (.getSource scanner after-opening before-closing))]
(prn :source source)
(ParsedInline/of (new InlineFormula source) (.position scanner)))))
(ParsedInline/none))))))

(defn extension []
(proxy [Object Parser$ParserExtension] []
(extend [^Parser$Builder pb]
(.inlineParserFactory pb (proxy [InlineParserFactory] []
(create [ctx]
(.addInlineParser (new InlineParserImpl ctx)
\$ (list (inline-formula-parser)))))))))

(comment

(nextjournal.markdown.parser2/parse "
# Ok
Aloha, that costs
* a $\\int_a^b\\phi(t)dt$ with discount
* and what"))

0 comments on commit adee38d

Please sign in to comment.