Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add select methods returning element streams #2092

Merged
merged 11 commits into from
Dec 14, 2024
41 changes: 36 additions & 5 deletions src/main/java/org/jsoup/nodes/Element.java
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,41 @@ public Elements select(Evaluator evaluator) {
return Selector.select(evaluator, this);
}

/**
* Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements
* may include this element, or any of its children.
* <p>This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
* multiple filters can be combined, e.g.:</p>
* <ul>
* <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)
Isira-Seneviratne marked this conversation as resolved.
Show resolved Hide resolved
* <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)
* </ul>
* <p>See the query syntax documentation in {@link org.jsoup.select.Selector}.</p>
* <p>Also known as {@code querySelectorAll()} in the Web DOM.</p>
*
* @param cssQuery a {@link Selector} CSS-like query
* @return a {@link Stream} containing elements that match the query (empty if none match)
* @see Selector selector query syntax
* @see QueryParser#parse(String)
* @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
* @since 1.17.2
Isira-Seneviratne marked this conversation as resolved.
Show resolved Hide resolved
*/
public Stream<Element> selectAsStream(String cssQuery) {
Isira-Seneviratne marked this conversation as resolved.
Show resolved Hide resolved
return Selector.selectAsStream(cssQuery, this);
}

/**
* Find elements that match the supplied Evaluator. This has the same functionality as {@link #select(String)}, but
* may be useful if you are running the same query many times (on many documents) and want to save the overhead of
* repeatedly parsing the CSS query.
* @param evaluator an element evaluator
* @return a {@link Stream} containing elements that match the query (empty if none match)
* @since 1.17.2
*/
public Stream<Element> selectAsStream(Evaluator evaluator) {
return Selector.selectAsStream(evaluator, this);
}

/**
* Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context.
* <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query
Expand Down Expand Up @@ -1128,11 +1163,7 @@ public Elements getElementsByTag(String tagName) {
public @Nullable Element getElementById(String id) {
Validate.notEmpty(id);

Elements elements = Collector.collect(new Evaluator.Id(id), this);
if (elements.size() > 0)
return elements.get(0);
else
return null;
return selectAsStream(new Evaluator.Id(id)).findFirst().orElse(null);
}

/**
Expand Down
25 changes: 16 additions & 9 deletions src/main/java/org/jsoup/select/Collector.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import org.jsoup.nodes.Element;
import org.jspecify.annotations.Nullable;

import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* Collects a list of elements that match the supplied criteria.
Expand All @@ -21,12 +21,22 @@ private Collector() {}
@param root root of tree to descend
@return list of matches; empty if none
*/
public static Elements collect (Evaluator eval, Element root) {
eval.reset();
public static Elements collect(Evaluator eval, Element root) {
return stream(eval, root).collect(Collectors.toCollection(Elements::new));
}

/**
* Obtain a stream of elements by visiting root and every descendant of root and testing it
* against the evaluator.
* @param evaluator Evaluator to test elements against
* @param root root of tree to descend
* @return A {@link Stream} of matches
*/
public static Stream<Element> stream(Evaluator evaluator, Element root) {
evaluator.reset();

return root.stream()
.filter(eval.asPredicate(root))
.collect(Collectors.toCollection(Elements::new));
.filter(evaluator.asPredicate(root));
}

/**
Expand All @@ -37,9 +47,6 @@ public static Elements collect (Evaluator eval, Element root) {
@return the first match; {@code null} if none
*/
public static @Nullable Element findFirst(Evaluator eval, Element root) {
eval.reset();

Optional<Element> first = root.stream().filter(eval.asPredicate(root)).findFirst();
return first.orElse(null);
return stream(eval, root).findFirst().orElse(null);
}
}
27 changes: 27 additions & 0 deletions src/main/java/org/jsoup/select/Selector.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import java.util.Collection;
import java.util.IdentityHashMap;
import java.util.stream.Stream;

/**
* CSS-like element selector, that finds elements matching a query.
Expand Down Expand Up @@ -114,6 +115,32 @@ public static Elements select(Evaluator evaluator, Element root) {
return Collector.collect(evaluator, root);
}

/**
* Find elements matching selector.
*
* @param query CSS selector
* @param root root element to descend into
* @return matching elements, empty if none
* @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
*/
public static Stream<Element> selectAsStream(String query, Element root) {
Validate.notEmpty(query);
return selectAsStream(QueryParser.parse(query), root);
}

/**
* Find elements matching selector.
*
* @param evaluator CSS selector
* @param root root element to descend into
* @return matching elements, empty if none
*/
public static Stream<Element> selectAsStream(Evaluator evaluator, Element root) {
Validate.notNull(evaluator);
Validate.notNull(root);
return Collector.stream(evaluator, root);
}

/**
* Find elements matching selector.
*
Expand Down