diff --git a/CHANGES.md b/CHANGES.md index 4c2eed2922..a650af40b9 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -5,8 +5,14 @@ ### Changes * Updated the minimum Android API Level validation from 10 to **21**. As with previous jsoup versions, Android - developers need to enable core library desugaring. The minimum Java version remains Java - 8. [2173](https://github.com/jhy/jsoup/pull/2173) + developers need to enable core library desugaring. The minimum Java version remains Java 8. + [2173](https://github.com/jhy/jsoup/pull/2173) + +### Improvements + +* Added `Element#selectStream(String query)` and `Element#selectStream(Evaluator )` methods, that return a `Stream` of + matching elements. Elements are evaluated and returned as they are found, and the stream can be + terminated early. [2092](https://github.com/jhy/jsoup/pull/2092) ### Bug Fixes diff --git a/src/main/java/org/jsoup/nodes/Element.java b/src/main/java/org/jsoup/nodes/Element.java index aff327b115..1a1538edd6 100644 --- a/src/main/java/org/jsoup/nodes/Element.java +++ b/src/main/java/org/jsoup/nodes/Element.java @@ -486,6 +486,39 @@ public Elements select(Evaluator evaluator) { return Selector.select(evaluator, this); } + /** + Selects elements from the given root that match the specified {@link Selector} CSS query, with this element as the + starting context, and returns them as a lazy Stream. Matched elements may include this element, or any of its + children. +

+ Unlike {@link #select(String query)}, which returns a complete list of all matching elements, this method returns a + {@link Stream} that processes elements lazily as they are needed. The stream operates in a "pull" model — elements + are fetched from the root as the stream is traversed. You can use standard {@code Stream} operations such as + {@code filter}, {@code map}, or {@code findFirst} to process elements on demand. +

+ + @param cssQuery a {@link Selector} CSS-like query + @return a {@link Stream} containing elements that match the query (empty if none match) + @throws Selector.SelectorParseException (unchecked) on an invalid CSS query. + @see Selector selector query syntax + @see QueryParser#parse(String) + @since 1.19.1 + */ + public Stream selectStream(String cssQuery) { + return Selector.selectStream(cssQuery, this); + } + + /** + Find a Stream of elements that match the supplied Evaluator. + + @param evaluator an element Evaluator + @return a {@link Stream} containing elements that match the query (empty if none match) + @since 1.19.1 + */ + public Stream selectStream(Evaluator evaluator) { + return Selector.selectStream(evaluator, this); + } + /** * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context. *

This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query @@ -1125,12 +1158,7 @@ public Elements getElementsByTag(String tagName) { */ public @Nullable Element getElementById(String id) { Validate.notEmpty(id); - - Elements elements = Collector.collect(new Evaluator.Id(id), this); - if (elements.size() > 0) - return elements.get(0); - else - return null; + return Collector.findFirst(new Evaluator.Id(id), this); } /** diff --git a/src/main/java/org/jsoup/select/Collector.java b/src/main/java/org/jsoup/select/Collector.java index 02b0528384..4199401571 100644 --- a/src/main/java/org/jsoup/select/Collector.java +++ b/src/main/java/org/jsoup/select/Collector.java @@ -3,8 +3,8 @@ import org.jsoup.nodes.Element; import org.jspecify.annotations.Nullable; -import java.util.Optional; import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Collects a list of elements that match the supplied criteria. @@ -16,17 +16,26 @@ public class Collector { private Collector() {} /** - Build a list of elements, by visiting root and every descendant of root, and testing it against the evaluator. + Build a list of elements, by visiting the root and every descendant of root, and testing it against the Evaluator. @param eval Evaluator to test elements against @param root root of tree to descend @return list of matches; empty if none */ - public static Elements collect (Evaluator eval, Element root) { - eval.reset(); + public static Elements collect(Evaluator eval, Element root) { + return stream(eval, root).collect(Collectors.toCollection(Elements::new)); + } + + /** + Obtain a Stream of elements by visiting the root and every descendant of root and testing it against the evaluator. - return root.stream() - .filter(eval.asPredicate(root)) - .collect(Collectors.toCollection(Elements::new)); + @param evaluator Evaluator to test elements against + @param root root of tree to descend + @return A {@link Stream} of matches + @since 1.19.1 + */ + public static Stream stream(Evaluator evaluator, Element root) { + evaluator.reset(); + return root.stream().filter(evaluator.asPredicate(root)); } /** @@ -37,9 +46,6 @@ public static Elements collect (Evaluator eval, Element root) { @return the first match; {@code null} if none */ public static @Nullable Element findFirst(Evaluator eval, Element root) { - eval.reset(); - - Optional first = root.stream().filter(eval.asPredicate(root)).findFirst(); - return first.orElse(null); + return stream(eval, root).findFirst().orElse(null); } } diff --git a/src/main/java/org/jsoup/select/Selector.java b/src/main/java/org/jsoup/select/Selector.java index e1d09f5b51..c0c574fbc4 100644 --- a/src/main/java/org/jsoup/select/Selector.java +++ b/src/main/java/org/jsoup/select/Selector.java @@ -6,6 +6,7 @@ import java.util.Collection; import java.util.IdentityHashMap; +import java.util.stream.Stream; /** * CSS-like element selector, that finds elements matching a query. @@ -90,12 +91,12 @@ public class Selector { private Selector() {} /** - * Find elements matching selector. - * - * @param query CSS selector - * @param root root element to descend into - * @return matching elements, empty if none - * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query. + Find Elements matching the CSS query. + + @param query CSS selector + @param root root element to descend into + @return matching elements, empty if none + @throws Selector.SelectorParseException (unchecked) on an invalid CSS query. */ public static Elements select(String query, Element root) { Validate.notEmpty(query); @@ -103,11 +104,11 @@ public static Elements select(String query, Element root) { } /** - * Find elements matching selector. - * - * @param evaluator CSS selector - * @param root root element to descend into - * @return matching elements, empty if none + Find Elements matching the Evaluator. + + @param evaluator CSS Evaluator + @param root root (context) element to start from + @return matching elements, empty if none */ public static Elements select(Evaluator evaluator, Element root) { Validate.notNull(evaluator); @@ -116,11 +117,39 @@ public static Elements select(Evaluator evaluator, Element root) { } /** - * Find elements matching selector. - * - * @param query CSS selector - * @param roots root elements to descend into - * @return matching elements, empty if none + Finds a Stream of elements matching the CSS query. + + @param query CSS selector + @param root root element to descend into + @return a Stream of matching elements, empty if none + @throws Selector.SelectorParseException (unchecked) on an invalid CSS query. + @since 1.19.1 + */ + public static Stream selectStream(String query, Element root) { + Validate.notEmpty(query); + return selectStream(QueryParser.parse(query), root); + } + + /** + Finds a Stream of elements matching the evaluator. + + @param evaluator CSS selector + @param root root element to descend into + @return matching elements, empty if none + @since 1.19.1 + */ + public static Stream selectStream(Evaluator evaluator, Element root) { + Validate.notNull(evaluator); + Validate.notNull(root); + return Collector.stream(evaluator, root); + } + + /** + Find elements matching the query. + + @param query CSS selector + @param roots root elements to descend into + @return matching elements, empty if none */ public static Elements select(String query, Iterable roots) { Validate.notEmpty(query); @@ -159,10 +188,11 @@ static Elements filterOut(Collection elements, Collection outs } /** - * Find the first element that matches the query. - * @param cssQuery CSS selector - * @param root root element to descend into - * @return the matching element, or null if none. + Find the first Element that matches the query. + + @param cssQuery CSS selector + @param root root element to descend into + @return the matching element, or null if none. */ public static @Nullable Element selectFirst(String cssQuery, Element root) { Validate.notEmpty(cssQuery); diff --git a/src/test/java/org/jsoup/nodes/ElementTest.java b/src/test/java/org/jsoup/nodes/ElementTest.java index 783cdb4e3e..2c2fd50d9b 100644 --- a/src/test/java/org/jsoup/nodes/ElementTest.java +++ b/src/test/java/org/jsoup/nodes/ElementTest.java @@ -2981,4 +2981,15 @@ void prettySerializationRoundTrips(Document.OutputSettings settings) { assertEquals("

One

", p.outerHtml()); assertEquals("CLASS=\"YES\"", attr.html()); } + + @Test void testSelectStream() { + Document doc = Jsoup.parse("
Hello world
"); + Element div = doc.select("div").stream().findFirst().orElse(null); + + assertEquals("Hello world", div.text()); + + div = doc.selectStream("div").findFirst().orElse(null); + + assertEquals("Hello world", div.text()); + } } diff --git a/src/test/java/org/jsoup/select/SelectorTest.java b/src/test/java/org/jsoup/select/SelectorTest.java index 0ae4048e3f..78d5bdb6be 100644 --- a/src/test/java/org/jsoup/select/SelectorTest.java +++ b/src/test/java/org/jsoup/select/SelectorTest.java @@ -8,7 +8,9 @@ import org.junit.jupiter.api.Test; import java.util.IdentityHashMap; +import java.util.List; import java.util.Locale; +import java.util.stream.Collectors; import static org.junit.jupiter.api.Assertions.*; @@ -402,6 +404,19 @@ public void testByAttributeStarting(Locale locale) { assertEquals("span", divChilds.get(2).tagName()); } + @Test public void streamParentChildStar() { + String h = "

Hello

there

Hi
"; + Document doc = Jsoup.parse(h); + + List divChilds = doc.selectStream("div > *") + .collect(Collectors.toList()); + + assertEquals(3, divChilds.size()); + assertEquals("p", divChilds.get(0).tagName()); + assertEquals("p", divChilds.get(1).tagName()); + assertEquals("span", divChilds.get(2).tagName()); + } + @Test public void multiChildDescent() { String h = ""; Document doc = Jsoup.parse(h);