Skip to content

Commit

Permalink
Add select methods returning element streams (#2092)
Browse files Browse the repository at this point in the history
---------

Co-authored-by: Jonathan Hedley <[email protected]>
  • Loading branch information
Isira-Seneviratne and jhy authored Dec 14, 2024
1 parent 3c8421c commit d5bbe25
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 39 deletions.
10 changes: 8 additions & 2 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,14 @@
### Changes

* Updated the minimum Android API Level validation from 10 to **21**. As with previous jsoup versions, Android
developers need to enable core library desugaring. The minimum Java version remains Java
8. [2173](https://github.com/jhy/jsoup/pull/2173)
developers need to enable core library desugaring. The minimum Java version remains Java 8.
[2173](https://github.com/jhy/jsoup/pull/2173)

### Improvements

* Added `Element#selectStream(String query)` and `Element#selectStream(Evaluator )` methods, that return a `Stream` of
matching elements. Elements are evaluated and returned as they are found, and the stream can be
terminated early. [2092](https://github.com/jhy/jsoup/pull/2092)

### Bug Fixes

Expand Down
40 changes: 34 additions & 6 deletions src/main/java/org/jsoup/nodes/Element.java
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,39 @@ public Elements select(Evaluator evaluator) {
return Selector.select(evaluator, this);
}

/**
Selects elements from the given root that match the specified {@link Selector} CSS query, with this element as the
starting context, and returns them as a lazy Stream. Matched elements may include this element, or any of its
children.
<p>
Unlike {@link #select(String query)}, which returns a complete list of all matching elements, this method returns a
{@link Stream} that processes elements lazily as they are needed. The stream operates in a "pull" model — elements
are fetched from the root as the stream is traversed. You can use standard {@code Stream} operations such as
{@code filter}, {@code map}, or {@code findFirst} to process elements on demand.
</p>
@param cssQuery a {@link Selector} CSS-like query
@return a {@link Stream} containing elements that match the query (empty if none match)
@throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
@see Selector selector query syntax
@see QueryParser#parse(String)
@since 1.19.1
*/
public Stream<Element> selectStream(String cssQuery) {
return Selector.selectStream(cssQuery, this);
}

/**
Find a Stream of elements that match the supplied Evaluator.
@param evaluator an element Evaluator
@return a {@link Stream} containing elements that match the query (empty if none match)
@since 1.19.1
*/
public Stream<Element> selectStream(Evaluator evaluator) {
return Selector.selectStream(evaluator, this);
}

/**
* Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context.
* <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query
Expand Down Expand Up @@ -1125,12 +1158,7 @@ public Elements getElementsByTag(String tagName) {
*/
public @Nullable Element getElementById(String id) {
Validate.notEmpty(id);

Elements elements = Collector.collect(new Evaluator.Id(id), this);
if (elements.size() > 0)
return elements.get(0);
else
return null;
return Collector.findFirst(new Evaluator.Id(id), this);
}

/**
Expand Down
28 changes: 17 additions & 11 deletions src/main/java/org/jsoup/select/Collector.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import org.jsoup.nodes.Element;
import org.jspecify.annotations.Nullable;

import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* Collects a list of elements that match the supplied criteria.
Expand All @@ -16,17 +16,26 @@ public class Collector {
private Collector() {}

/**
Build a list of elements, by visiting root and every descendant of root, and testing it against the evaluator.
Build a list of elements, by visiting the root and every descendant of root, and testing it against the Evaluator.
@param eval Evaluator to test elements against
@param root root of tree to descend
@return list of matches; empty if none
*/
public static Elements collect (Evaluator eval, Element root) {
eval.reset();
public static Elements collect(Evaluator eval, Element root) {
return stream(eval, root).collect(Collectors.toCollection(Elements::new));
}

/**
Obtain a Stream of elements by visiting the root and every descendant of root and testing it against the evaluator.
return root.stream()
.filter(eval.asPredicate(root))
.collect(Collectors.toCollection(Elements::new));
@param evaluator Evaluator to test elements against
@param root root of tree to descend
@return A {@link Stream} of matches
@since 1.19.1
*/
public static Stream<Element> stream(Evaluator evaluator, Element root) {
evaluator.reset();
return root.stream().filter(evaluator.asPredicate(root));
}

/**
Expand All @@ -37,9 +46,6 @@ public static Elements collect (Evaluator eval, Element root) {
@return the first match; {@code null} if none
*/
public static @Nullable Element findFirst(Evaluator eval, Element root) {
eval.reset();

Optional<Element> first = root.stream().filter(eval.asPredicate(root)).findFirst();
return first.orElse(null);
return stream(eval, root).findFirst().orElse(null);
}
}
70 changes: 50 additions & 20 deletions src/main/java/org/jsoup/select/Selector.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import java.util.Collection;
import java.util.IdentityHashMap;
import java.util.stream.Stream;

/**
* CSS-like element selector, that finds elements matching a query.
Expand Down Expand Up @@ -90,24 +91,24 @@ public class Selector {
private Selector() {}

/**
* Find elements matching selector.
*
* @param query CSS selector
* @param root root element to descend into
* @return matching elements, empty if none
* @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
Find Elements matching the CSS query.
@param query CSS selector
@param root root element to descend into
@return matching elements, empty if none
@throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
*/
public static Elements select(String query, Element root) {
Validate.notEmpty(query);
return select(QueryParser.parse(query), root);
}

/**
* Find elements matching selector.
*
* @param evaluator CSS selector
* @param root root element to descend into
* @return matching elements, empty if none
Find Elements matching the Evaluator.
@param evaluator CSS Evaluator
@param root root (context) element to start from
@return matching elements, empty if none
*/
public static Elements select(Evaluator evaluator, Element root) {
Validate.notNull(evaluator);
Expand All @@ -116,11 +117,39 @@ public static Elements select(Evaluator evaluator, Element root) {
}

/**
* Find elements matching selector.
*
* @param query CSS selector
* @param roots root elements to descend into
* @return matching elements, empty if none
Finds a Stream of elements matching the CSS query.
@param query CSS selector
@param root root element to descend into
@return a Stream of matching elements, empty if none
@throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
@since 1.19.1
*/
public static Stream<Element> selectStream(String query, Element root) {
Validate.notEmpty(query);
return selectStream(QueryParser.parse(query), root);
}

/**
Finds a Stream of elements matching the evaluator.
@param evaluator CSS selector
@param root root element to descend into
@return matching elements, empty if none
@since 1.19.1
*/
public static Stream<Element> selectStream(Evaluator evaluator, Element root) {
Validate.notNull(evaluator);
Validate.notNull(root);
return Collector.stream(evaluator, root);
}

/**
Find elements matching the query.
@param query CSS selector
@param roots root elements to descend into
@return matching elements, empty if none
*/
public static Elements select(String query, Iterable<Element> roots) {
Validate.notEmpty(query);
Expand Down Expand Up @@ -159,10 +188,11 @@ static Elements filterOut(Collection<Element> elements, Collection<Element> outs
}

/**
* Find the first element that matches the query.
* @param cssQuery CSS selector
* @param root root element to descend into
* @return the matching element, or <b>null</b> if none.
Find the first Element that matches the query.
@param cssQuery CSS selector
@param root root element to descend into
@return the matching element, or <b>null</b> if none.
*/
public static @Nullable Element selectFirst(String cssQuery, Element root) {
Validate.notEmpty(cssQuery);
Expand Down
11 changes: 11 additions & 0 deletions src/test/java/org/jsoup/nodes/ElementTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2981,4 +2981,15 @@ void prettySerializationRoundTrips(Document.OutputSettings settings) {
assertEquals("<p CLASS=\"YES\">One</p>", p.outerHtml());
assertEquals("CLASS=\"YES\"", attr.html());
}

@Test void testSelectStream() {
Document doc = Jsoup.parse("<div>Hello world</div>");
Element div = doc.select("div").stream().findFirst().orElse(null);

assertEquals("Hello world", div.text());

div = doc.selectStream("div").findFirst().orElse(null);

assertEquals("Hello world", div.text());
}
}
15 changes: 15 additions & 0 deletions src/test/java/org/jsoup/select/SelectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
import org.junit.jupiter.api.Test;

import java.util.IdentityHashMap;
import java.util.List;
import java.util.Locale;
import java.util.stream.Collectors;

import static org.junit.jupiter.api.Assertions.*;

Expand Down Expand Up @@ -402,6 +404,19 @@ public void testByAttributeStarting(Locale locale) {
assertEquals("span", divChilds.get(2).tagName());
}

@Test public void streamParentChildStar() {
String h = "<div id=1><p>Hello<p><b>there</b></p></div><div id=2><span>Hi</span></div>";
Document doc = Jsoup.parse(h);

List<Element> divChilds = doc.selectStream("div > *")
.collect(Collectors.toList());

assertEquals(3, divChilds.size());
assertEquals("p", divChilds.get(0).tagName());
assertEquals("p", divChilds.get(1).tagName());
assertEquals("span", divChilds.get(2).tagName());
}

@Test public void multiChildDescent() {
String h = "<div id=foo><h1 class=bar><a href=http://example.com/>One</a></h1></div>";
Document doc = Jsoup.parse(h);
Expand Down

0 comments on commit d5bbe25

Please sign in to comment.