diff --git a/CHANGES.md b/CHANGES.md index 059c903ac4..9db0887ac1 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,8 +3,12 @@ ## 1.18.2 (Pending) ### Improvements -* The form associated elements returned by `FormElement.elements()` now reflect changes made to the DOM, + +* The form associated elements returned by `FormElement.elements()` now reflect changes made to the DOM, subsequently to the original parse. [2140](https://github.com/jhy/jsoup/issues/2140) +* In the `TreeBuilder`, the `onNodeInserted()` and `onNodeClosed()` events are now also fired for the outermost / + root `Document` node. This enables source position tracking on the Document node (which was previously unset). And + it also enables the node traversor to see the outer Document node. [2182](https://github.com/jhy/jsoup/pull/2182) ### Bug Fixes diff --git a/src/main/java/org/jsoup/parser/TreeBuilder.java b/src/main/java/org/jsoup/parser/TreeBuilder.java index fb5c0708fa..89ba82701b 100644 --- a/src/main/java/org/jsoup/parser/TreeBuilder.java +++ b/src/main/java/org/jsoup/parser/TreeBuilder.java @@ -58,6 +58,7 @@ void initialiseParse(Reader input, String baseUri, Parser parser) { start = new Token.StartTag(this); currentToken = start; // init current token to the virtual start token. this.baseUri = baseUri; + onNodeInserted(doc); } void completeParse() { @@ -108,7 +109,13 @@ void runParser() { boolean stepParser() { // if we have reached the end already, step by popping off the stack, to hit nodeRemoved callbacks: if (currentToken.type == Token.TokenType.EOF) { - if (stack == null || stack.isEmpty()) return false; // stack will be null if TB was closed, as in case of runParser() + completeFragment() + if (stack == null) { + return false; + } if (stack.isEmpty()) { + onNodeClosed(doc); // the root doc is not on the stack, so let this final step close it + stack = null; + return true; + } pop(); return true; } diff --git a/src/test/java/org/jsoup/parser/PositionTest.java b/src/test/java/org/jsoup/parser/PositionTest.java index e809809583..9dd42971a7 100644 --- a/src/test/java/org/jsoup/parser/PositionTest.java +++ b/src/test/java/org/jsoup/parser/PositionTest.java @@ -519,6 +519,22 @@ private void printRange(Node node) { assertEquals("class=\"On\"", attr.html()); } + @Test void tracksDocument() { + String html = "
Bar."; + Document doc = Jsoup.parse(html, TrackingHtmlParser); + StringBuilder track = new StringBuilder(); + doc.forEachNode(node -> accumulatePositions(node, track)); + assertEquals("#document:0-0~40-40; #doctype:0-15; html:15-15~40-40; head:15-15~33-33; title:15-22~15-33; #text:22-25; body:33-33~40-40; p:33-36~40-40; #text:36-40; ", track.toString()); + } + + @Test void tracksDocumentXml() { + String html = "
Bar."; + Document doc = Jsoup.parse(html, TrackingXmlParser); + StringBuilder track = new StringBuilder(); + doc.forEachNode(node -> accumulatePositions(node, track)); + assertEquals("#document:0-0~40-40; #doctype:0-15; title:15-22~25-33; #text:22-25; p:33-36~40-40; #text:36-40; ", track.toString()); + } + @Test void updateKeyMaintainsRangeUc() { String html = "
One
"; Document doc = Jsoup.parse(html, TrackingXmlParser); diff --git a/src/test/java/org/jsoup/parser/StreamParserTest.java b/src/test/java/org/jsoup/parser/StreamParserTest.java index bbbd620c59..cebab0c424 100644 --- a/src/test/java/org/jsoup/parser/StreamParserTest.java +++ b/src/test/java/org/jsoup/parser/StreamParserTest.java @@ -36,7 +36,7 @@ void canStream() { StringBuilder seen; seen = new StringBuilder(); parser.stream().forEachOrdered(el -> trackSeen(el, seen)); - assertEquals("title[Test];head+;div#1[D1]+;span[P One];p#3+;p#4[P Two];div#2[D2]+;p#6[P three];div#5[D3];body;html;", seen.toString()); + assertEquals("title[Test];head+;div#1[D1]+;span[P One];p#3+;p#4[P Two];div#2[D2]+;p#6[P three];div#5[D3];body;html;#root;", seen.toString()); // checks expected order, and the + indicates that element had a next sibling at time of emission } } @@ -48,7 +48,7 @@ void canStreamXml() { StringBuilder seen; seen = new StringBuilder(); parser.stream().forEachOrdered(el -> trackSeen(el, seen)); - assertEquals("DIV#1[D1]+;span[P One];p#3+;p#4[P Two];div#2[D2]+;p#6[P three];div#5[D3];outmost;", seen.toString()); + assertEquals("DIV#1[D1]+;span[P One];p#3+;p#4[P Two];div#2[D2]+;p#6[P three];div#5[D3];outmost;#root;", seen.toString()); // checks expected order, and the + indicates that element had a next sibling at time of emission } } @@ -64,7 +64,7 @@ void canStreamXml() { trackSeen(it.next(), seen); } - assertEquals("title[Test];head+;div#1[D1]+;span[P One];p#3+;p#4[P Two];div#2[D2]+;p#6[P three];div#5[D3];body;html;", seen.toString()); + assertEquals("title[Test];head+;div#1[D1]+;span[P One];p#3+;p#4[P Two];div#2[D2]+;p#6[P three];div#5[D3];body;html;#root;", seen.toString()); // checks expected order, and the + indicates that element had a next sibling at time of emission } @@ -75,13 +75,13 @@ void canStreamXml() { StringBuilder seen = new StringBuilder(); parser.stream().forEach(el -> trackSeen(el, seen)); - assertEquals("head+;p[One]+;p[Two];body;html;", seen.toString()); + assertEquals("head+;p[One]+;p[Two];body;html;#root;", seen.toString()); String html2 = "