Skip to content

Commit

Permalink
Deprecate Document#updateMetaCharsetElement
Browse files Browse the repository at this point in the history
As the setting had no effect; calls to charset(charset) always enabled it

Users can still call OutputSettings.charset if desired to avoid setting the meta element.
  • Loading branch information
jhy committed Dec 16, 2024
1 parent 8e9f869 commit dd2b7e3
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 120 deletions.
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
* Removed previously deprecated class: `org.jsoup.UncheckedIOException` (replace with `java.io.UncheckedIOException`);
method `Element Element#forEach(Consumer)` to
`void Element#forEach(Consumer())`. [2246](https://github.com/jhy/jsoup/pull/2246)
* Deprecated the methods `Document#updateMetaCharsetElement(bool)` and `#Document#updateMetaCharsetElement()`, as the
setting had no effect. When `Document#charset(Charset)` is called, the document's meta charset or XML encoding
instruction is always set.

### Improvements

Expand Down
168 changes: 61 additions & 107 deletions src/main/java/org/jsoup/nodes/Document.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ public class Document extends Element {
private Parser parser; // the parser used to parse this document
private QuirksMode quirksMode = QuirksMode.noQuirks;
private final String location;
private boolean updateMetaCharset = false;

/**
Create a new, empty Document, in the specified namespace.
Expand Down Expand Up @@ -241,76 +240,56 @@ public Element text(String text) {
public String nodeName() {
return "#document";
}

/**
* Sets the charset used in this document. This method is equivalent
* to {@link OutputSettings#charset(java.nio.charset.Charset)
* OutputSettings.charset(Charset)} but in addition it updates the
* charset / encoding element within the document.
*
* <p>This enables
* {@link #updateMetaCharsetElement(boolean) meta charset update}.</p>
*
* <p>If there's no element with charset / encoding information yet it will
* be created. Obsolete charset / encoding definitions are removed!</p>
*
* <p><b>Elements used:</b></p>
*
* <ul>
* <li><b>Html:</b> <i>&lt;meta charset="CHARSET"&gt;</i></li>
* <li><b>Xml:</b> <i>&lt;?xml version="1.0" encoding="CHARSET"&gt;</i></li>
* </ul>
*
* @param charset Charset
*
* @see #updateMetaCharsetElement(boolean)
* @see OutputSettings#charset(java.nio.charset.Charset)
Set the output character set of this Document. This method is equivalent to
{@link OutputSettings#charset(java.nio.charset.Charset) OutputSettings.charset(Charset)}, but additionally adds or
updates the charset / encoding element within the Document.
<p>If there's no existing element with charset / encoding information yet, one will
be created. Obsolete charset / encoding definitions are removed.</p>
<p><b>Elements used:</b></p>
<ul>
<li><b>HTML:</b> <i>&lt;meta charset="CHARSET"&gt;</i></li>
<li><b>XML:</b> <i>&lt;?xml version="1.0" encoding="CHARSET"&gt;</i></li>
</ul>
@param charset Charset
@return this Document, for chaining
@see OutputSettings#charset(java.nio.charset.Charset)
*/
public void charset(Charset charset) {
updateMetaCharsetElement(true);
public Document charset(Charset charset) {
outputSettings.charset(charset);
ensureMetaCharsetElement();
return this;
}

/**
* Returns the charset used in this document. This method is equivalent
* to {@link OutputSettings#charset()}.
*
* @return Current Charset
*
* @see OutputSettings#charset()
Get the output character set of this Document. This method is equivalent to {@link OutputSettings#charset()}.
@return the current Charset
@see OutputSettings#charset()
*/
public Charset charset() {
return outputSettings.charset();
}

/**
* Sets whether the element with charset information in this document is
* updated on changes through {@link #charset(java.nio.charset.Charset)
* Document.charset(Charset)} or not.
*
* <p>If set to <tt>false</tt> <i>(default)</i> there are no elements
* modified.</p>
*
* @param update If <tt>true</tt> the element updated on charset
* changes, <tt>false</tt> if not
*
* @see #charset(java.nio.charset.Charset)
@deprecated this setting has no effect; the meta charset element is always updated when
{@link Document#charset(Charset)} is called. This method will be removed in jsoup 1.20.1.
*/
public void updateMetaCharsetElement(boolean update) {
this.updateMetaCharset = update;
}

@Deprecated(forRemoval = true, since = "1.19.1")
public void updateMetaCharsetElement(boolean noop) {}

/**
* Returns whether the element with charset information in this document is
* updated on changes through {@link #charset(java.nio.charset.Charset)
* Document.charset(Charset)} or not.
*
* @return Returns <tt>true</tt> if the element is updated on charset
* changes, <tt>false</tt> if not
@deprecated this setting has no effect; the meta charset element is always updated when
{@link Document#charset(Charset)} is called. This method will be removed in jsoup 1.20.1.
*/
@Deprecated(forRemoval = true, since = "1.19.1")
public boolean updateMetaCharsetElement() {
return updateMetaCharset;
return true;
}

@Override
Expand All @@ -329,61 +308,36 @@ public Document shallowClone() {
return clone;
}

/**
* Ensures a meta charset (html) or xml declaration (xml) with the current
* encoding used. This only applies with
* {@link #updateMetaCharsetElement(boolean) updateMetaCharset} set to
* <tt>true</tt>, otherwise this method does nothing.
*
* <ul>
* <li>An existing element gets updated with the current charset</li>
* <li>If there's no element yet it will be inserted</li>
* <li>Obsolete elements are removed</li>
* </ul>
*
* <p><b>Elements used:</b></p>
*
* <ul>
* <li><b>Html:</b> <i>&lt;meta charset="CHARSET"&gt;</i></li>
* <li><b>Xml:</b> <i>&lt;?xml version="1.0" encoding="CHARSET"&gt;</i></li>
* </ul>
*/

private void ensureMetaCharsetElement() {
if (updateMetaCharset) {
OutputSettings.Syntax syntax = outputSettings().syntax();

if (syntax == OutputSettings.Syntax.html) {
Element metaCharset = selectFirst("meta[charset]");
if (metaCharset != null) {
metaCharset.attr("charset", charset().displayName());
} else {
head().appendElement("meta").attr("charset", charset().displayName());
}
select("meta[name=charset]").remove(); // Remove obsolete elements
} else if (syntax == OutputSettings.Syntax.xml) {
Node node = ensureChildNodes().get(0);
if (node instanceof XmlDeclaration) {
XmlDeclaration decl = (XmlDeclaration) node;
if (decl.name().equals("xml")) {
decl.attr("encoding", charset().displayName());
if (decl.hasAttr("version"))
decl.attr("version", "1.0");
} else {
decl = new XmlDeclaration("xml", false);
decl.attr("version", "1.0");
decl.attr("encoding", charset().displayName());
prependChild(decl);
}
} else {
XmlDeclaration decl = new XmlDeclaration("xml", false);
decl.attr("version", "1.0");
decl.attr("encoding", charset().displayName());
prependChild(decl);
}
OutputSettings.Syntax syntax = outputSettings().syntax();

if (syntax == OutputSettings.Syntax.html) {
Element metaCharset = selectFirst("meta[charset]");
if (metaCharset != null) {
metaCharset.attr("charset", charset().displayName());
} else {
head().appendElement("meta").attr("charset", charset().displayName());
}
select("meta[name=charset]").remove(); // Remove obsolete elements
} else if (syntax == OutputSettings.Syntax.xml) {
XmlDeclaration decl = ensureXmlDecl();
decl.attr("version", "1.0");
decl.attr("encoding", charset().displayName());
}
}


private XmlDeclaration ensureXmlDecl() {
Node node = ensureChildNodes().get(0);
if (node instanceof XmlDeclaration) {
XmlDeclaration decl = (XmlDeclaration) node;
if (decl.name().equals("xml")) return decl;
}
XmlDeclaration decl = new XmlDeclaration("xml", false);
prependChild(decl);
return decl;
}


/**
* A Document's output settings control the form of the text() and html() methods.
Expand Down
13 changes: 0 additions & 13 deletions src/test/java/org/jsoup/nodes/DocumentTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,6 @@ public class DocumentTest {
@Test
public void testMetaCharsetUpdateUtf8() {
final Document doc = createHtmlDocument("changeThis");
doc.updateMetaCharsetElement(true);
doc.charset(Charset.forName(charsetUtf8));

final String htmlCharsetUTF8 = "<html>\n" +
Expand All @@ -247,7 +246,6 @@ public void testMetaCharsetUpdateUtf8() {
@Test
public void testMetaCharsetUpdateIso8859() {
final Document doc = createHtmlDocument("changeThis");
doc.updateMetaCharsetElement(true);
doc.charset(Charset.forName(charsetIso8859));

final String htmlCharsetISO = "<html>\n" +
Expand All @@ -267,7 +265,6 @@ public void testMetaCharsetUpdateIso8859() {
@Test
public void testMetaCharsetUpdateNoCharset() {
final Document docNoCharset = Document.createShell("");
docNoCharset.updateMetaCharsetElement(true);
docNoCharset.charset(Charset.forName(charsetUtf8));

assertEquals(charsetUtf8, docNoCharset.select("meta[charset]").first().attr("charset"));
Expand Down Expand Up @@ -328,7 +325,6 @@ public void testMetaCharsetUpdateEnabledAfterCharsetChange() {
@Test
public void testMetaCharsetUpdateCleanup() {
final Document doc = createHtmlDocument("dontTouch");
doc.updateMetaCharsetElement(true);
doc.charset(Charset.forName(charsetUtf8));

final String htmlCharsetUTF8 = "<html>\n" +
Expand All @@ -344,7 +340,6 @@ public void testMetaCharsetUpdateCleanup() {
@Test
public void testMetaCharsetUpdateXmlUtf8() {
final Document doc = createXmlDocument("1.0", "changeThis", true);
doc.updateMetaCharsetElement(true);
doc.charset(Charset.forName(charsetUtf8));

final String xmlCharsetUTF8 = "<?xml version=\"1.0\" encoding=\"" + charsetUtf8 + "\"?>\n" +
Expand All @@ -362,7 +357,6 @@ public void testMetaCharsetUpdateXmlUtf8() {
@Test
public void testMetaCharsetUpdateXmlIso8859() {
final Document doc = createXmlDocument("1.0", "changeThis", true);
doc.updateMetaCharsetElement(true);
doc.charset(Charset.forName(charsetIso8859));

final String xmlCharsetISO = "<?xml version=\"1.0\" encoding=\"" + charsetIso8859 + "\"?>\n" +
Expand All @@ -380,7 +374,6 @@ public void testMetaCharsetUpdateXmlIso8859() {
@Test
public void testMetaCharsetUpdateXmlNoCharset() {
final Document doc = createXmlDocument("1.0", "none", false);
doc.updateMetaCharsetElement(true);
doc.charset(Charset.forName(charsetUtf8));

final String xmlCharsetUTF8 = "<?xml version=\"1.0\" encoding=\"" + charsetUtf8 + "\"?>\n" +
Expand Down Expand Up @@ -418,12 +411,6 @@ public void testMetaCharsetUpdateXmlDisabledNoChanges() {
assertEquals("dontTouch", selectedNode.attr("version"));
}

@Test
public void testMetaCharsetUpdatedDisabledPerDefault() {
final Document doc = createHtmlDocument("none");
assertFalse(doc.updateMetaCharsetElement());
}

private Document createHtmlDocument(String charset) {
final Document doc = Document.createShell("");
doc.head().appendElement("meta").attr("charset", charset);
Expand Down

0 comments on commit dd2b7e3

Please sign in to comment.