diff --git a/src/main/java/org/jsoup/Jsoup.java b/src/main/java/org/jsoup/Jsoup.java
index 29acbafbc4..e20311bcd8 100644
--- a/src/main/java/org/jsoup/Jsoup.java
+++ b/src/main/java/org/jsoup/Jsoup.java
@@ -13,6 +13,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
+import java.nio.file.Path;
/**
The core public access point to the jsoup functionality.
@@ -183,6 +184,72 @@ public static Document parse(File file, @Nullable String charsetName, String bas
return DataUtil.load(file, charsetName, baseUri, parser);
}
+ /**
+ Parse the contents of a file as HTML.
+
+ @param path file to load HTML from. Supports gzipped files (ending in .z or .gz).
+ @param charsetName (optional) character set of file contents. Set to {@code null} to determine from {@code http-equiv} meta tag, if
+ present, or fall back to {@code UTF-8} (which is often safe to do).
+ @param baseUri The URL where the HTML was retrieved from, to resolve relative links against.
+ @return sane HTML
+
+ @throws IOException if the file could not be found, or read, or if the charsetName is invalid.
+ @since 1.18.1
+ */
+ public static Document parse(Path path, @Nullable String charsetName, String baseUri) throws IOException {
+ return DataUtil.load(path, charsetName, baseUri);
+ }
+
+ /**
+ Parse the contents of a file as HTML. The location of the file is used as the base URI to qualify relative URLs.
+
+ @param path file to load HTML from. Supports gzipped files (ending in .z or .gz).
+ @param charsetName (optional) character set of file contents. Set to {@code null} to determine from {@code http-equiv} meta tag, if
+ present, or fall back to {@code UTF-8} (which is often safe to do).
+ @return sane HTML
+
+ @throws IOException if the file could not be found, or read, or if the charsetName is invalid.
+ @see #parse(File, String, String) parse(file, charset, baseUri)
+ @since 1.18.1
+ */
+ public static Document parse(Path path, @Nullable String charsetName) throws IOException {
+ return DataUtil.load(path, charsetName, path.toAbsolutePath().toString());
+ }
+
+ /**
+ Parse the contents of a file as HTML. The location of the file is used as the base URI to qualify relative URLs.
+ The charset used to read the file will be determined by the byte-order-mark (BOM), or a {@code } tag,
+ or if neither is present, will be {@code UTF-8}.
+
+ This is the equivalent of calling {@link #parse(File, String) parse(file, null)}
+
+ @param path the file to load HTML from. Supports gzipped files (ending in .z or .gz).
+ @return sane HTML
+ @throws IOException if the file could not be found or read.
+ @see #parse(Path, String, String) parse(file, charset, baseUri)
+ @since 1.18.1
+ */
+ public static Document parse(Path path) throws IOException {
+ return DataUtil.load(path, null, path.toAbsolutePath().toString());
+ }
+
+ /**
+ Parse the contents of a file as HTML.
+
+ @param path file to load HTML from. Supports gzipped files (ending in .z or .gz).
+ @param charsetName (optional) character set of file contents. Set to {@code null} to determine from {@code http-equiv} meta tag, if
+ present, or fall back to {@code UTF-8} (which is often safe to do).
+ @param baseUri The URL where the HTML was retrieved from, to resolve relative links against.
+ @param parser alternate {@link Parser#xmlParser() parser} to use.
+ @return sane HTML
+
+ @throws IOException if the file could not be found, or read, or if the charsetName is invalid.
+ @since 1.18.1
+ */
+ public static Document parse(Path path, @Nullable String charsetName, String baseUri, Parser parser) throws IOException {
+ return DataUtil.load(path, charsetName, baseUri, parser);
+ }
+
/**
Read an input stream, and parse it to a Document.
diff --git a/src/main/java/org/jsoup/UncheckedIOException.java b/src/main/java/org/jsoup/UncheckedIOException.java
index dd6a76b5a6..a3b4fa31b6 100644
--- a/src/main/java/org/jsoup/UncheckedIOException.java
+++ b/src/main/java/org/jsoup/UncheckedIOException.java
@@ -6,7 +6,7 @@
* @deprecated Use {@link java.io.UncheckedIOException} instead. This class acted as a compatibility shim for Java
* versions prior to 1.8.
*/
-// todo annotate @Deprecated in next release (after previous @Deprecations clear)
+@Deprecated
public class UncheckedIOException extends java.io.UncheckedIOException {
public UncheckedIOException(IOException cause) {
super(cause);
diff --git a/src/main/java/org/jsoup/helper/DataUtil.java b/src/main/java/org/jsoup/helper/DataUtil.java
index c1c791053c..58f44fb7c0 100644
--- a/src/main/java/org/jsoup/helper/DataUtil.java
+++ b/src/main/java/org/jsoup/helper/DataUtil.java
@@ -2,7 +2,6 @@
import org.jsoup.internal.ControllableInputStream;
import org.jsoup.internal.Normalizer;
-import org.jsoup.internal.SharedConstants;
import org.jsoup.internal.StringUtil;
import org.jsoup.nodes.Comment;
import org.jsoup.nodes.Document;
@@ -16,7 +15,6 @@
import java.io.BufferedReader;
import java.io.CharArrayReader;
import java.io.File;
-import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
@@ -25,8 +23,12 @@
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
+import java.nio.channels.Channels;
+import java.nio.channels.SeekableByteChannel;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
+import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.Locale;
import java.util.Random;
import java.util.regex.Matcher;
@@ -63,7 +65,7 @@ private DataUtil() {}
* @throws IOException on IO error
*/
public static Document load(File file, @Nullable String charsetName, String baseUri) throws IOException {
- return load(file, charsetName, baseUri, Parser.htmlParser());
+ return load(file.toPath(), charsetName, baseUri);
}
/**
@@ -81,18 +83,48 @@ public static Document load(File file, @Nullable String charsetName, String base
* @since 1.14.2
*/
public static Document load(File file, @Nullable String charsetName, String baseUri, Parser parser) throws IOException {
- InputStream stream = new FileInputStream(file);
- String name = Normalizer.lowerCase(file.getName());
- if (name.endsWith(".gz") || name.endsWith(".z")) {
- // unfortunately file input streams don't support marks (why not?), so we will close and reopen after read
- boolean zipped;
- try {
- zipped = (stream.read() == 0x1f && stream.read() == 0x8b); // gzip magic bytes
- } finally {
- stream.close();
+ return load(file.toPath(), charsetName, baseUri, parser);
+ }
+
+ /**
+ * Loads and parses a file to a Document, with the HtmlParser. Files that are compressed with gzip (and end in {@code .gz} or {@code .z})
+ * are supported in addition to uncompressed files.
+ *
+ * @param path file to load
+ * @param charsetName (optional) character set of input; specify {@code null} to attempt to autodetect. A BOM in
+ * the file will always override this setting.
+ * @param baseUri base URI of document, to resolve relative links against
+ * @return Document
+ * @throws IOException on IO error
+ */
+ public static Document load(Path path, @Nullable String charsetName, String baseUri) throws IOException {
+ return load(path, charsetName, baseUri, Parser.htmlParser());
+ }
+ /**
+ * Loads and parses a file to a Document. Files that are compressed with gzip (and end in {@code .gz} or {@code .z})
+ * are supported in addition to uncompressed files.
+ *
+ * @param path file to load
+ * @param charsetName (optional) character set of input; specify {@code null} to attempt to autodetect. A BOM in
+ * the file will always override this setting.
+ * @param baseUri base URI of document, to resolve relative links against
+ * @param parser alternate {@link Parser#xmlParser() parser} to use.
+
+ * @return Document
+ * @throws IOException on IO error
+ * @since 1.17.2
+ */
+ public static Document load(Path path, @Nullable String charsetName, String baseUri, Parser parser) throws IOException {
+ final SeekableByteChannel byteChannel = Files.newByteChannel(path);
+ InputStream stream = Channels.newInputStream(byteChannel);
+ String name = Normalizer.lowerCase(path.getFileName().toString());
+ if (name.endsWith(".gz") || name.endsWith(".z")) {
+ final boolean zipped = (stream.read() == 0x1f && stream.read() == 0x8b); // gzip magic bytes
+ byteChannel.position(0); // reset to start of file
+ if (zipped) {
+ stream = new GZIPInputStream(stream);
}
- stream = zipped ? new GZIPInputStream(new FileInputStream(file)) : new FileInputStream(file);
}
return parseInputStream(stream, charsetName, baseUri, parser);
}
@@ -139,16 +171,15 @@ static void crossStreams(final InputStream in, final OutputStream out) throws IO
static Document parseInputStream(@Nullable InputStream input, @Nullable String charsetName, String baseUri, Parser parser) throws IOException {
if (input == null) // empty body
return new Document(baseUri);
- input = ControllableInputStream.wrap(input, DefaultBufferSize, 0);
@Nullable Document doc = null;
// read the start of the stream and look for a BOM or meta charset
- try {
- input.mark(DefaultBufferSize);
- ByteBuffer firstBytes = readToByteBuffer(input, firstReadBufferSize - 1); // -1 because we read one more to see if completed. First read is < buffer size, so can't be invalid.
- boolean fullyRead = (input.read() == -1);
- input.reset();
+ try (InputStream wrappedInputStream = ControllableInputStream.wrap(input, DefaultBufferSize, 0)) {
+ wrappedInputStream.mark(DefaultBufferSize);
+ ByteBuffer firstBytes = readToByteBuffer(wrappedInputStream, firstReadBufferSize - 1); // -1 because we read one more to see if completed. First read is < buffer size, so can't be invalid.
+ boolean fullyRead = (wrappedInputStream.read() == -1);
+ wrappedInputStream.reset();
// look for BOM - overrides any other header or input
BomCharset bomCharset = detectCharsetFromBom(firstBytes);
@@ -189,9 +220,8 @@ else if (first instanceof Comment) {
if (comment.isXmlDeclaration())
decl = comment.asXmlDeclaration();
}
- if (decl != null) {
- if (decl.name().equalsIgnoreCase("xml"))
- foundCharset = decl.attr("encoding");
+ if (decl != null && decl.name().equalsIgnoreCase("xml")) {
+ foundCharset = decl.attr("encoding");
}
}
foundCharset = validateCharset(foundCharset);
@@ -208,8 +238,7 @@ else if (first instanceof Comment) {
if (doc == null) {
if (charsetName == null)
charsetName = defaultCharsetName;
- BufferedReader reader = new BufferedReader(new InputStreamReader(input, Charset.forName(charsetName)), DefaultBufferSize); // Android level does not allow us try-with-resources
- try {
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(wrappedInputStream, Charset.forName(charsetName)), DefaultBufferSize)) {
if (bomCharset != null && bomCharset.offset) { // creating the buffered reader ignores the input pos, so must skip here
long skipped = reader.skip(1);
Validate.isTrue(skipped == 1); // WTF if this fails.
@@ -227,14 +256,8 @@ else if (first instanceof Comment) {
doc.charset(UTF_8);
}
}
- finally {
- reader.close();
- }
}
}
- finally {
- input.close();
- }
return doc;
}
diff --git a/src/main/java/org/jsoup/helper/HttpConnection.java b/src/main/java/org/jsoup/helper/HttpConnection.java
index fc9467aeba..ef3d2024d1 100644
--- a/src/main/java/org/jsoup/helper/HttpConnection.java
+++ b/src/main/java/org/jsoup/helper/HttpConnection.java
@@ -5,6 +5,7 @@
import org.jsoup.UncheckedIOException;
import org.jsoup.UnsupportedMimeTypeException;
import org.jsoup.internal.ControllableInputStream;
+import org.jsoup.internal.Functions;
import org.jsoup.internal.SharedConstants;
import org.jsoup.internal.StringUtil;
import org.jsoup.nodes.Document;
@@ -39,6 +40,7 @@
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import java.util.zip.Inflater;
@@ -1109,13 +1111,8 @@ private static LinkedHashMap> createHeaderMap(HttpURLConnec
if (key == null || val == null)
continue; // skip http1.1 line
- if (headers.containsKey(key))
- headers.get(key).add(val);
- else {
- final ArrayList vals = new ArrayList<>();
- vals.add(val);
- headers.put(key, vals);
- }
+ final List vals = headers.computeIfAbsent(key, Functions.listFunction());
+ vals.add(val);
}
return headers;
}
diff --git a/src/main/java/org/jsoup/internal/ConstrainableInputStream.java b/src/main/java/org/jsoup/internal/ConstrainableInputStream.java
deleted file mode 100644
index 8f382ea042..0000000000
--- a/src/main/java/org/jsoup/internal/ConstrainableInputStream.java
+++ /dev/null
@@ -1,100 +0,0 @@
-package org.jsoup.internal;
-
-import org.jsoup.helper.DataUtil;
-import org.jsoup.helper.Validate;
-
-import java.io.BufferedInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.SocketTimeoutException;
-import java.nio.ByteBuffer;
-
-/**
- * A jsoup internal class (so don't use it as there is no contract API) that enables constraints on an Input Stream,
- * namely a maximum read size, and the ability to Thread.interrupt() the read.
- * @deprecated use {@link ControllableInputStream} instead (but don't use that either, because this is jsoup internal!)
- */
-@Deprecated
-public final class ConstrainableInputStream extends BufferedInputStream {
- private final boolean capped;
- private final int maxSize;
- private long startTime;
- private long timeout = 0; // optional max time of request
- private int remaining;
- private boolean interrupted;
-
- private ConstrainableInputStream(InputStream in, int bufferSize, int maxSize) {
- super(in, bufferSize);
- Validate.isTrue(maxSize >= 0);
- this.maxSize = maxSize;
- remaining = maxSize;
- capped = maxSize != 0;
- startTime = System.nanoTime();
- }
-
- /**
- * If this InputStream is not already a ConstrainableInputStream, let it be one.
- * @param in the input stream to (maybe) wrap
- * @param bufferSize the buffer size to use when reading
- * @param maxSize the maximum size to allow to be read. 0 == infinite.
- * @return a constrainable input stream
- */
- public static ConstrainableInputStream wrap(InputStream in, int bufferSize, int maxSize) {
- return in instanceof ConstrainableInputStream
- ? (ConstrainableInputStream) in
- : new ConstrainableInputStream(in, bufferSize, maxSize);
- }
-
- @Override
- public int read(byte[] b, int off, int len) throws IOException {
- if (interrupted || capped && remaining <= 0)
- return -1;
- if (Thread.currentThread().isInterrupted()) {
- // interrupted latches, because parse() may call twice
- interrupted = true;
- return -1;
- }
- if (expired())
- throw new SocketTimeoutException("Read timeout");
-
- if (capped && len > remaining)
- len = remaining; // don't read more than desired, even if available
-
- try {
- final int read = super.read(b, off, len);
- remaining -= read;
- return read;
- } catch (SocketTimeoutException e) {
- return 0;
- }
- }
-
- /**
- * Reads this inputstream to a ByteBuffer. The supplied max may be less than the inputstream's max, to support
- * reading just the first bytes.
- */
- public ByteBuffer readToByteBuffer(int max) throws IOException {
- return DataUtil.readToByteBuffer(this, max);
- }
-
- @Override
- public void reset() throws IOException {
- super.reset();
- remaining = maxSize - markpos;
- }
-
- public ConstrainableInputStream timeout(long startTimeNanos, long timeoutMillis) {
- this.startTime = startTimeNanos;
- this.timeout = timeoutMillis * 1000000;
- return this;
- }
-
- private boolean expired() {
- if (timeout == 0)
- return false;
-
- final long now = System.nanoTime();
- final long dur = now - startTime;
- return (dur > timeout);
- }
-}
diff --git a/src/main/java/org/jsoup/internal/FieldsAreNonnullByDefault.java b/src/main/java/org/jsoup/internal/FieldsAreNonnullByDefault.java
deleted file mode 100644
index 9e099b4ee5..0000000000
--- a/src/main/java/org/jsoup/internal/FieldsAreNonnullByDefault.java
+++ /dev/null
@@ -1,17 +0,0 @@
-package org.jsoup.internal;
-
-import org.jspecify.annotations.NullMarked;
-
-import java.lang.annotation.Documented;
-import java.lang.annotation.Retention;
-import java.lang.annotation.RetentionPolicy;
-
-/**
- @deprecated Previously indicated that fields types are not nullable, unless otherwise specified by @Nullable.
- */
-@Deprecated
-@Documented
-@NullMarked
-@Retention(value = RetentionPolicy.CLASS)
-public @interface FieldsAreNonnullByDefault {
-}
diff --git a/src/main/java/org/jsoup/internal/Functions.java b/src/main/java/org/jsoup/internal/Functions.java
new file mode 100644
index 0000000000..40227d8417
--- /dev/null
+++ b/src/main/java/org/jsoup/internal/Functions.java
@@ -0,0 +1,40 @@
+package org.jsoup.internal;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.IdentityHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.Function;
+
+/**
+ * An internal class containing functions for use with {@link Map#computeIfAbsent(Object, Function)}.
+ */
+@SuppressWarnings({"rawtypes", "unchecked"})
+public final class Functions {
+ private static final Function ListFunction = key -> new ArrayList<>();
+ private static final Function SetFunction = key -> new HashSet<>();
+ private static final Function MapFunction = key -> new HashMap<>();
+ private static final Function IdentityMapFunction = key -> new IdentityHashMap<>();
+
+ private Functions() {
+ }
+
+ public static Function> listFunction() {
+ return (Function>) ListFunction;
+ }
+
+ public static Function> setFunction() {
+ return (Function>) SetFunction;
+ }
+
+ public static Function> mapFunction() {
+ return (Function>) MapFunction;
+ }
+
+ public static Function> identityMapFunction() {
+ return (Function>) IdentityMapFunction;
+ }
+}
diff --git a/src/main/java/org/jsoup/internal/NonnullByDefault.java b/src/main/java/org/jsoup/internal/NonnullByDefault.java
deleted file mode 100644
index cda055d4f2..0000000000
--- a/src/main/java/org/jsoup/internal/NonnullByDefault.java
+++ /dev/null
@@ -1,17 +0,0 @@
-package org.jsoup.internal;
-
-import org.jspecify.annotations.NullMarked;
-
-import java.lang.annotation.Documented;
-import java.lang.annotation.Retention;
-import java.lang.annotation.RetentionPolicy;
-
-/**
- @deprecated Previously indicated that all components (methods, returns, fields) are not nullable, unless otherwise specified by @Nullable.
- */
-@Deprecated
-@Documented
-@NullMarked
-@Retention(value = RetentionPolicy.CLASS)
-public @interface NonnullByDefault {
-}
diff --git a/src/main/java/org/jsoup/internal/ReturnsAreNonnullByDefault.java b/src/main/java/org/jsoup/internal/ReturnsAreNonnullByDefault.java
deleted file mode 100644
index d218d6cb7a..0000000000
--- a/src/main/java/org/jsoup/internal/ReturnsAreNonnullByDefault.java
+++ /dev/null
@@ -1,17 +0,0 @@
-package org.jsoup.internal;
-
-import org.jspecify.annotations.NullMarked;
-
-import java.lang.annotation.Documented;
-import java.lang.annotation.Retention;
-import java.lang.annotation.RetentionPolicy;
-
-/**
- @deprecated Previously indicated that return types are not nullable, unless otherwise specified by @Nullable.
- */
-@Deprecated
-@Documented
-@NullMarked
-@Retention(value = RetentionPolicy.RUNTIME)
-public @interface ReturnsAreNonnullByDefault {
-}
diff --git a/src/main/java/org/jsoup/internal/StringUtil.java b/src/main/java/org/jsoup/internal/StringUtil.java
index 85e104ef16..3be5f7d809 100644
--- a/src/main/java/org/jsoup/internal/StringUtil.java
+++ b/src/main/java/org/jsoup/internal/StringUtil.java
@@ -10,6 +10,8 @@
import java.util.Iterator;
import java.util.Stack;
import java.util.regex.Pattern;
+import java.util.stream.Collector;
+import java.util.stream.Collectors;
/**
A minimal String utility class. Designed for internal jsoup use only - the API and outcome may change without
@@ -375,6 +377,23 @@ public static String releaseBuilder(StringBuilder sb) {
return string;
}
+ /**
+ * Return a {@link Collector} similar to the one returned by {@link Collectors#joining(CharSequence)},
+ * but backed by jsoup's {@link StringJoiner}, which allows for more efficient garbage collection.
+ *
+ * @param delimiter The delimiter for separating the strings.
+ * @return A {@code Collector} which concatenates CharSequence elements, separated by the specified delimiter
+ */
+ public static Collector joining(String delimiter) {
+ return Collector.of(() -> new StringJoiner(delimiter),
+ StringJoiner::add,
+ (j1, j2) -> {
+ j1.append(j2.complete());
+ return j1;
+ },
+ StringJoiner::complete);
+ }
+
private static final int MaxCachedBuilderSize = 8 * 1024;
private static final int MaxIdleBuilders = 8;
}
diff --git a/src/main/java/org/jsoup/nodes/Element.java b/src/main/java/org/jsoup/nodes/Element.java
index 41ba2e9482..9d35cc18e7 100644
--- a/src/main/java/org/jsoup/nodes/Element.java
+++ b/src/main/java/org/jsoup/nodes/Element.java
@@ -6,6 +6,7 @@
import org.jsoup.parser.ParseSettings;
import org.jsoup.parser.Parser;
import org.jsoup.parser.Tag;
+import org.jsoup.parser.TokenQueue;
import org.jsoup.select.Collector;
import org.jsoup.select.Elements;
import org.jsoup.select.Evaluator;
@@ -970,12 +971,9 @@ private String cssSelectorComponent() {
// Escape tagname, and translate HTML namespace ns:tag to CSS namespace syntax ns|tag
String tagName = escapeCssIdentifier(tagName()).replace("\\:", "|");
StringBuilder selector = StringUtil.borrowBuilder().append(tagName);
- // String classes = StringUtil.join(classNames().stream().map(TokenQueue::escapeCssIdentifier).iterator(), ".");
- // todo - replace with ^^ in 1.16.1 when we enable Android support for stream etc
- StringUtil.StringJoiner escapedClasses = new StringUtil.StringJoiner(".");
- for (String name : classNames()) escapedClasses.add(escapeCssIdentifier(name));
- String classes = escapedClasses.complete();
- if (classes.length() > 0)
+ String classes = classNames().stream().map(TokenQueue::escapeCssIdentifier)
+ .collect(StringUtil.joining("."));
+ if (!classes.isEmpty())
selector.append('.').append(classes);
if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node
diff --git a/src/main/java/org/jsoup/nodes/Range.java b/src/main/java/org/jsoup/nodes/Range.java
index 955c043a8c..ccc1db5f51 100644
--- a/src/main/java/org/jsoup/nodes/Range.java
+++ b/src/main/java/org/jsoup/nodes/Range.java
@@ -95,12 +95,6 @@ static Range of(Node node, boolean start) {
return range != null ? (Range) range : Untracked;
}
- /**
- @deprecated no-op; internal method moved out of visibility
- */
- @Deprecated
- public void track(Node node, boolean start) {}
-
@Override
public boolean equals(Object o) {
if (this == o) return true;
diff --git a/src/main/java/org/jsoup/parser/XmlTreeBuilder.java b/src/main/java/org/jsoup/parser/XmlTreeBuilder.java
index 8ef653eee3..bc4b612d49 100644
--- a/src/main/java/org/jsoup/parser/XmlTreeBuilder.java
+++ b/src/main/java/org/jsoup/parser/XmlTreeBuilder.java
@@ -131,20 +131,6 @@ void insertDoctypeFor(Token.Doctype token) {
insertLeafNode(doctypeNode);
}
- /** @deprecated unused and will be removed. */
- @Deprecated
- protected void insertNode(Node node) {
- currentElement().appendChild(node);
- onNodeInserted(node);
- }
-
- /** @deprecated unused and will be removed. */
- @Deprecated
- protected void insertNode(Node node, Token token) {
- currentElement().appendChild(node);
- onNodeInserted(node);
- }
-
/**
* If the stack contains an element with this tag's name, pop up the stack to remove the first occurrence. If not
* found, skips.
diff --git a/src/main/java/org/jsoup/safety/Safelist.java b/src/main/java/org/jsoup/safety/Safelist.java
index eb1281ba6d..629522fc32 100644
--- a/src/main/java/org/jsoup/safety/Safelist.java
+++ b/src/main/java/org/jsoup/safety/Safelist.java
@@ -6,6 +6,7 @@ Thank you to Ryan Grove (wonko.com) for the Ruby HTML cleaner http://github.com/
*/
import org.jsoup.helper.Validate;
+import org.jsoup.internal.Functions;
import org.jsoup.internal.Normalizer;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Attributes;
@@ -304,12 +305,8 @@ public Safelist addAttributes(String tag, String... attributes) {
Validate.notEmpty(key);
attributeSet.add(AttributeKey.valueOf(key));
}
- if (this.attributes.containsKey(tagName)) {
- Set currentSet = this.attributes.get(tagName);
- currentSet.addAll(attributeSet);
- } else {
- this.attributes.put(tagName, attributeSet);
- }
+ Set currentSet = this.attributes.computeIfAbsent(tagName, Functions.setFunction());
+ currentSet.addAll(attributeSet);
return this;
}
@@ -382,13 +379,8 @@ public Safelist addEnforcedAttribute(String tag, String attribute, String value)
AttributeKey attrKey = AttributeKey.valueOf(attribute);
AttributeValue attrVal = AttributeValue.valueOf(value);
- if (enforcedAttributes.containsKey(tagName)) {
- enforcedAttributes.get(tagName).put(attrKey, attrVal);
- } else {
- Map attrMap = new HashMap<>();
- attrMap.put(attrKey, attrVal);
- enforcedAttributes.put(tagName, attrMap);
- }
+ Map attrMap = enforcedAttributes.computeIfAbsent(tagName, Functions.mapFunction());
+ attrMap.put(attrKey, attrVal);
return this;
}
@@ -458,21 +450,9 @@ public Safelist addProtocols(String tag, String attribute, String... protocols)
TagName tagName = TagName.valueOf(tag);
AttributeKey attrKey = AttributeKey.valueOf(attribute);
- Map> attrMap;
- Set protSet;
-
- if (this.protocols.containsKey(tagName)) {
- attrMap = this.protocols.get(tagName);
- } else {
- attrMap = new HashMap<>();
- this.protocols.put(tagName, attrMap);
- }
- if (attrMap.containsKey(attrKey)) {
- protSet = attrMap.get(attrKey);
- } else {
- protSet = new HashSet<>();
- attrMap.put(attrKey, protSet);
- }
+ Map> attrMap = this.protocols.computeIfAbsent(tagName, Functions.mapFunction());
+ Set protSet = attrMap.computeIfAbsent(attrKey, Functions.setFunction());
+
for (String protocol : protocols) {
Validate.notEmpty(protocol);
Protocol prot = Protocol.valueOf(protocol);
diff --git a/src/main/java/org/jsoup/select/CombiningEvaluator.java b/src/main/java/org/jsoup/select/CombiningEvaluator.java
index 25e5eac2ee..ea442f0dc3 100644
--- a/src/main/java/org/jsoup/select/CombiningEvaluator.java
+++ b/src/main/java/org/jsoup/select/CombiningEvaluator.java
@@ -7,15 +7,15 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
-import java.util.Collections;
import java.util.Comparator;
+import java.util.List;
/**
* Base combining (and, or) evaluator.
*/
public abstract class CombiningEvaluator extends Evaluator {
final ArrayList evaluators; // maintain original order so that #toString() is sensible
- final ArrayList sortedEvaluators; // cost ascending order
+ final List sortedEvaluators; // cost ascending order
int num = 0;
int cost = 0;
@@ -62,12 +62,9 @@ void updateEvaluators() {
}
sortedEvaluators.clear();
sortedEvaluators.addAll(evaluators);
- Collections.sort(sortedEvaluators, costComparator);
+ sortedEvaluators.sort(Comparator.comparingInt(Evaluator::cost));
}
- private static final Comparator costComparator = (o1, o2) -> o1.cost() - o2.cost();
- // ^ comparingInt, sortedEvaluators.sort not available in targeted version
-
public static final class And extends CombiningEvaluator {
And(Collection evaluators) {
super(evaluators);
diff --git a/src/main/java/org/jsoup/select/StructuralEvaluator.java b/src/main/java/org/jsoup/select/StructuralEvaluator.java
index ce5051fb85..ca7beacb15 100644
--- a/src/main/java/org/jsoup/select/StructuralEvaluator.java
+++ b/src/main/java/org/jsoup/select/StructuralEvaluator.java
@@ -1,11 +1,13 @@
package org.jsoup.select;
+import org.jsoup.internal.Functions;
import org.jsoup.internal.StringUtil;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.NodeIterator;
import java.util.ArrayList;
import java.util.IdentityHashMap;
+import java.util.Map;
/**
* Base structural evaluator.
@@ -23,19 +25,9 @@ public StructuralEvaluator(Evaluator evaluator) {
threadMemo = ThreadLocal.withInitial(IdentityHashMap::new);
boolean memoMatches(final Element root, final Element element) {
- // not using computeIfAbsent, as the lambda impl requires a new Supplier closure object on every hit: tons of GC
- IdentityHashMap> rootMemo = threadMemo.get();
- IdentityHashMap memo = rootMemo.get(root);
- if (memo == null) {
- memo = new IdentityHashMap<>();
- rootMemo.put(root, memo);
- }
- Boolean matches = memo.get(element);
- if (matches == null) {
- matches = evaluator.matches(root, element);
- memo.put(element, matches);
- }
- return matches;
+ Map> rootMemo = threadMemo.get();
+ Map memo = rootMemo.computeIfAbsent(root, Functions.identityMapFunction());
+ return memo.computeIfAbsent(element, key -> evaluator.matches(root, key));
}
@Override protected void reset() {
@@ -163,34 +155,6 @@ public String toString() {
}
}
- /**
- @deprecated replaced by {@link ImmediateParentRun}
- */
- @Deprecated
- static class ImmediateParent extends StructuralEvaluator {
- public ImmediateParent(Evaluator evaluator) {
- super(evaluator);
- }
-
- @Override
- public boolean matches(Element root, Element element) {
- if (root == element)
- return false;
-
- Element parent = element.parent();
- return parent != null && memoMatches(root, parent);
- }
-
- @Override protected int cost() {
- return 1 + evaluator.cost();
- }
-
- @Override
- public String toString() {
- return String.format("%s > ", evaluator);
- }
- }
-
/**
Holds a list of evaluators for one > two > three immediate parent matches, and the final direct evaluator under
test. To match, these are effectively ANDed together, starting from the last, matching up to the first.
diff --git a/src/test/java/org/jsoup/helper/DataUtilTest.java b/src/test/java/org/jsoup/helper/DataUtilTest.java
index 10074d4ca9..61627aac20 100644
--- a/src/test/java/org/jsoup/helper/DataUtilTest.java
+++ b/src/test/java/org/jsoup/helper/DataUtilTest.java
@@ -11,8 +11,10 @@
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
+import java.nio.file.Path;
import static org.jsoup.integration.ParseTest.getFile;
+import static org.jsoup.integration.ParseTest.getPath;
import static org.junit.jupiter.api.Assertions.*;
public class DataUtilTest {
@@ -207,13 +209,21 @@ public void supportsXmlCharsetDeclaration() throws IOException {
@Test
- public void lLoadsGzipFile() throws IOException {
+ public void loadsGzipFile() throws IOException {
File in = getFile("/htmltests/gzip.html.gz");
Document doc = Jsoup.parse(in, null);
assertEquals("Gzip test", doc.title());
assertEquals("This is a gzipped HTML file.", doc.selectFirst("p").text());
}
+ @Test
+ public void loadsGzipPath() throws IOException {
+ Path in = getPath("/htmltests/gzip.html.gz");
+ Document doc = Jsoup.parse(in, null);
+ assertEquals("Gzip test", doc.title());
+ assertEquals("This is a gzipped HTML file.", doc.selectFirst("p").text());
+ }
+
@Test
public void loadsZGzipFile() throws IOException {
// compressed on win, with z suffix
@@ -223,6 +233,15 @@ public void loadsZGzipFile() throws IOException {
assertEquals("This is a gzipped HTML file.", doc.selectFirst("p").text());
}
+ @Test
+ public void loadsZGzipPath() throws IOException {
+ // compressed on win, with z suffix
+ Path in = getPath("/htmltests/gzip.html.z");
+ Document doc = Jsoup.parse(in, null);
+ assertEquals("Gzip test", doc.title());
+ assertEquals("This is a gzipped HTML file.", doc.selectFirst("p").text());
+ }
+
@Test
public void handlesFakeGzipFile() throws IOException {
File in = getFile("/htmltests/fake-gzip.html.gz");
@@ -231,6 +250,14 @@ public void handlesFakeGzipFile() throws IOException {
assertEquals("And should still be readable.", doc.selectFirst("p").text());
}
+ @Test
+ public void handlesFakeGzipPath() throws IOException {
+ Path in = getPath("/htmltests/fake-gzip.html.gz");
+ Document doc = Jsoup.parse(in, null);
+ assertEquals("This is not gzipped", doc.title());
+ assertEquals("And should still be readable.", doc.selectFirst("p").text());
+ }
+
// an input stream to give a range of output sizes, that changes on each read
static class VaryingReadInputStream extends InputStream {
final InputStream in;
diff --git a/src/test/java/org/jsoup/integration/ParseTest.java b/src/test/java/org/jsoup/integration/ParseTest.java
index 0c5cb2b15c..d84c103497 100644
--- a/src/test/java/org/jsoup/integration/ParseTest.java
+++ b/src/test/java/org/jsoup/integration/ParseTest.java
@@ -15,6 +15,8 @@
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.zip.GZIPInputStream;
import static org.junit.jupiter.api.Assertions.*;
@@ -133,6 +135,15 @@ public static File getFile(String resourceName) {
}
}
+ public static Path getPath(String resourceName) {
+ try {
+ URL resource = ParseTest.class.getResource(resourceName);
+ return resource != null ? Paths.get(resource.toURI()) : Paths.get("/404");
+ } catch (URISyntaxException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
public static InputStream inputStreamFrom(String s) {
return new ByteArrayInputStream(s.getBytes(StandardCharsets.UTF_8));
}
diff --git a/src/test/java/org/jsoup/nodes/BuildEntities.java b/src/test/java/org/jsoup/nodes/BuildEntities.java
index 423c26805a..c5eb554ae4 100644
--- a/src/test/java/org/jsoup/nodes/BuildEntities.java
+++ b/src/test/java/org/jsoup/nodes/BuildEntities.java
@@ -105,12 +105,6 @@ private static String d(int d) {
return Integer.toString(d, Entities.codepointRadix);
}
- private static class ByName implements Comparator {
- public int compare(CharacterRef o1, CharacterRef o2) {
- return o1.name.compareTo(o2.name);
- }
- }
-
private static class ByCode implements Comparator {
public int compare(CharacterRef o1, CharacterRef o2) {
int[] c1 = o1.codepoints;
@@ -131,6 +125,6 @@ public int compare(CharacterRef o1, CharacterRef o2) {
}
}
- private static ByName byName = new ByName();
- private static ByCode byCode = new ByCode();
+ private static final Comparator byName = Comparator.comparing(ref -> ref.name);
+ private static final ByCode byCode = new ByCode();
}