diff --git a/README.md b/README.md index 0d74b201..4f35cb25 100644 --- a/README.md +++ b/README.md @@ -5,15 +5,20 @@ A library for performing fast, configurable cleansing of HTML coming from untrus Another way of saying that could be: It's an API that helps you make sure that clients don't supply malicious cargo code in the HTML they supply for their profile, comments, etc., that get persisted on the server. The term "malicious code" in regards to web applications usually mean "JavaScript." Mostly, Cascading Stylesheets are only considered malicious -when they invoke the JavaScript. However, there are many situations where "normal" HTML and CSS can be used in a malicious manner. +when they invoke JavaScript. However, there are many situations where "normal" HTML and CSS can be used in a malicious manner. -How to Use ----------- +More details on antisamy are available at: https://www.owasp.org/index.php/Category:OWASP_AntiSamy_Project. Particularly at: https://www.owasp.org/index.php/Category:OWASP_AntiSamy_Project#tab=How_do_I_get_started_3F. + +There is also a legacy developers guide at: https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/owaspantisamy/Developer%20Guide.pdf (not sure how long that will remain accessible). + +How to Import +------------- First, add the dependency from Maven: ```xml org.owasp.antisamy - antisamy + antisamy + LATEST_VERSION ``` diff --git a/pom.xml b/pom.xml index 6ec4b89e..24124847 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.owasp.antisamy antisamy jar - 1.5.7 + 1.5.8 @@ -35,11 +35,16 @@ - + scm:git:git@github.com:nahsra/antisamy.git scm:git:git@github.com:nahsra/antisamy.git scm:git:git@github.com:nahsra/antisamy.git - + antisamy-1.5.8 + + + + UTF-8 + @@ -50,55 +55,92 @@ org.apache.xmlgraphics batik-css - 1.9.1 + 1.11 + + + + commons-logging + commons-logging + + - + net.sourceforge.nekohtml nekohtml 1.9.22 + + + + xerces + xercesImpl + + - - junit - junit - jar - test - 4.12 - - - commons-codec - commons-codec - 1.10 - test - - - commons-io - commons-io - 2.2 - test - org.apache.httpcomponents httpclient - 4.3.6 + 4.5.8 + + + + commons-codec + commons-codec + + - org.eclipse.jetty - jetty-server - 7.6.14.v20131031 - test - + xerces + xercesImpl + 2.12.0 + + + commons-codec + commons-codec + 1.12 + + + - org.eclipse.jetty - jetty-servlet - 7.6.14.v20131031 + junit + junit + 4.12 test + + + + + + org.apache.maven.plugins + maven-dependency-plugin + 3.1.1 + + + + + org.apache.maven.plugins + maven-clean-plugin + 3.1.0 + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.0 + + 1.7 + 1.7 + 1.7 + 1.7 + -Xlint:unchecked + + org.apache.maven.plugins maven-jar-plugin + 3.1.0 @@ -110,39 +152,94 @@ - + + org.apache.maven.plugins + maven-install-plugin + 2.5.2 + + org.apache.maven.plugins - maven-gpg-plugin + maven-javadoc-plugin + 3.0.1 - sign-artifacts - verify - - sign - + attach-javadocs + package + jar - + - maven-javadoc-plugin + org.apache.maven.plugins + maven-site-plugin + 3.7.1 + + + org.apache.maven.plugins + maven-source-plugin + 3.0.1 - attach-javadocs + attach-sources package - jar + jar-no-fork - + - maven-source-plugin - - - attach-sources - package - jar-no-fork - - - + org.apache.maven.plugins + maven-surefire-plugin + 2.22.1 + + + com.github.spotbugs + spotbugs-maven-plugin + 3.1.11 + + + + com.github.spotbugs + spotbugs + 3.1.12 + + + + + + + org.codehaus.mojo + versions-maven-plugin + 2.5 + + + + dependency-updates-report + plugin-updates-report + + + + + + org.apache.maven.plugins + maven-project-info-reports-plugin + 3.0.0 + + + + dependency-convergence + + + + + false + + + + com.github.spotbugs + spotbugs-maven-plugin + + + diff --git a/src/main/java/org/owasp/validator/css/CssHandler.java b/src/main/java/org/owasp/validator/css/CssHandler.java index 4a728486..f36dc3bb 100644 --- a/src/main/java/org/owasp/validator/css/CssHandler.java +++ b/src/main/java/org/owasp/validator/css/CssHandler.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2019, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * @@ -58,7 +58,6 @@ * * @see javax.swing.text.html.StyleSheet * @author Jason Li - * */ public class CssHandler implements DocumentHandler { @@ -78,20 +77,19 @@ public class CssHandler implements DocumentHandler { private final InternalPolicy policy; /** - * The encaspulated results including the error messages + * The error messages */ -// private final CleanResults results; private final Collection errorMessages; /** - * The error message bundled to pull from. + * The error message bundle to pull from. */ private ResourceBundle messages; /** * A queue of imported stylesheets; used to track imported stylesheets */ - private final LinkedList importedStyleSheets; + private final LinkedList importedStyleSheets; /** * The tag currently being examined (if any); used for inline stylesheet @@ -119,8 +117,12 @@ public class CssHandler implements DocumentHandler { * the policy to use * @param embeddedStyleSheets * the queue of stylesheets imported + * @param errorMessages + * the List of error messages to use if there are errors + * @param messages + * the error message bundle to pull from */ - public CssHandler(Policy policy, LinkedList embeddedStyleSheets, + public CssHandler(Policy policy, LinkedList embeddedStyleSheets, List errorMessages, ResourceBundle messages) { this(policy, embeddedStyleSheets, errorMessages, null, messages); } @@ -133,10 +135,14 @@ public CssHandler(Policy policy, LinkedList embeddedStyleSheets, * the policy to use * @param embeddedStyleSheets * the queue of stylesheets imported + * @param errorMessages + * the List of error messages to use if there are errors * @param tagName * the associated tag name with this inline style + * @param messages + * the error message bundle to pull from */ - public CssHandler(Policy policy, LinkedList embeddedStyleSheets, + public CssHandler(Policy policy, LinkedList embeddedStyleSheets, List errorMessages, String tagName, ResourceBundle messages) { this.policy = (InternalPolicy) policy; this.errorMessages = errorMessages; @@ -150,7 +156,7 @@ public CssHandler(Policy policy, LinkedList embeddedStyleSheets, /** * Returns the cleaned stylesheet. * - * @return the cleaned styesheet + * @return the cleaned stylesheet. */ public String getCleanStylesheet() { // Always ensure results contain most recent generation of stylesheet @@ -161,8 +167,8 @@ public String getCleanStylesheet() { * Returns the error messages generated during parsing. * @return the error messages generated during parsing */ - public Collection getErrorMessages() { - return new ArrayList(errorMessages); + public Collection getErrorMessages() { + return new ArrayList(errorMessages); } /* @@ -202,7 +208,6 @@ public void ignorableAtRule(String atRule) throws CSSException { HTMLEntityEncoder.htmlEntityEncode(atRule) })); } - } /* @@ -228,7 +233,7 @@ public void importStyle(String uri, SACMediaList media, errorMessages.add(ErrorMessageUtil.getMessage( messages, ErrorMessageUtil.ERROR_CSS_IMPORT_URL_INVALID, - new Object[] { HTMLEntityEncoder.htmlEntityEncode(uri) })); + new Object[] {})); return; } diff --git a/src/main/java/org/owasp/validator/css/CssScanner.java b/src/main/java/org/owasp/validator/css/CssScanner.java index ab9148f6..61680a32 100644 --- a/src/main/java/org/owasp/validator/css/CssScanner.java +++ b/src/main/java/org/owasp/validator/css/CssScanner.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2019, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * @@ -30,7 +30,11 @@ import java.io.IOException; import java.io.StringReader; -import java.util.*; +import java.net.URI; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.ResourceBundle; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -38,7 +42,6 @@ import org.apache.batik.css.parser.Parser; import org.owasp.validator.html.CleanResults; import org.owasp.validator.html.InternalPolicy; -import org.owasp.validator.html.Policy; import org.owasp.validator.html.ScanException; import org.w3c.css.sac.InputSource; @@ -80,6 +83,8 @@ public class CssScanner { * * @param policy * the policy to follow when scanning + * @param messages + * the error message bundle to pull from */ public CssScanner(InternalPolicy policy, ResourceBundle messages) { this.policy = policy; @@ -102,60 +107,56 @@ public CssScanner(InternalPolicy policy, ResourceBundle messages) { * @throws ScanException * if an error occurs during scanning */ - public CleanResults scanStyleSheet(String taintedCss, int sizeLimit) - throws ScanException { + public CleanResults scanStyleSheet(String taintedCss, int sizeLimit) throws ScanException { long startOfScan = System.currentTimeMillis(); List errorMessages = new ArrayList(); - /* Check to see if the text starts with (\s)*(\s)*. - */ + /* Check to see if the text starts with (\s)*(\s)*. + */ - Matcher m = p.matcher(taintedCss); - - boolean isCdata = m.matches(); - - if ( isCdata ) { - taintedCss = m.group(1); - } - - // Create a queue of all style sheets that need to be validated to - // account for any sheets that may be imported by the current CSS - LinkedList stylesheets = new LinkedList(); - - CssHandler handler = new CssHandler(policy, stylesheets, errorMessages, messages); - - // parse the stylesheet - parser.setDocumentHandler(handler); - - try { - // parse the style declaration - // note this does not count against the size limit because it - // should already have been counted by the caller since it was - // embedded in the HTML - parser - .parseStyleSheet(new InputSource(new StringReader( - taintedCss))); - } catch (IOException ioe) { - throw new ScanException(ioe); - - /* - * ParseExceptions, from batik, is unfortunately a RuntimeException. - */ - } catch (ParseException pe) { - throw new ScanException(pe); - } + Matcher m = p.matcher(taintedCss); + boolean isCdata = m.matches(); - parseImportedStylesheets(stylesheets, handler, errorMessages, sizeLimit); + if ( isCdata ) { + taintedCss = m.group(1); + } - String cleaned = handler.getCleanStylesheet(); - - if ( isCdata && !policy.isUseXhtml()) { - cleaned = ""; - } - - return new CleanResults(startOfScan, cleaned, null, errorMessages); + // Create a queue of all style sheets that need to be validated to + // account for any sheets that may be imported by the current CSS + LinkedList stylesheets = new LinkedList(); + + CssHandler handler = new CssHandler(policy, stylesheets, errorMessages, messages); + + // parse the stylesheet + parser.setDocumentHandler(handler); + + try { + // parse the style declaration + // note this does not count against the size limit because it + // should already have been counted by the caller since it was + // embedded in the HTML + parser.parseStyleSheet(new InputSource(new StringReader(taintedCss))); + } catch (IOException ioe) { + throw new ScanException(ioe); + + /* + * ParseExceptions, from batik, is unfortunately a RuntimeException. + */ + } catch (ParseException pe) { + throw new ScanException(pe); + } + + parseImportedStylesheets(stylesheets, handler, errorMessages, sizeLimit); + + String cleaned = handler.getCleanStylesheet(); + + if ( isCdata && !policy.isUseXhtml()) { + cleaned = ""; + } + + return new CleanResults(startOfScan, cleaned, null, errorMessages); } /** @@ -181,32 +182,31 @@ public CleanResults scanStyleSheet(String taintedCss, int sizeLimit) public CleanResults scanInlineStyle(String taintedCss, String tagName, int sizeLimit) throws ScanException { - long startOfScan = System.currentTimeMillis(); + long startOfScan = System.currentTimeMillis(); - List errorMessages = new ArrayList(); + List errorMessages = new ArrayList(); - // Create a queue of all style sheets that need to be validated to - // account for any sheets that may be imported by the current CSS - LinkedList stylesheets = new LinkedList(); + // Create a queue of all style sheets that need to be validated to + // account for any sheets that may be imported by the current CSS + LinkedList stylesheets = new LinkedList(); - CssHandler handler = new CssHandler(policy, stylesheets, errorMessages, - tagName, messages); + CssHandler handler = new CssHandler(policy, stylesheets, errorMessages, tagName, messages); - parser.setDocumentHandler(handler); + parser.setDocumentHandler(handler); - try { - // parse the inline style declaration - // note this does not count against the size limit because it - // should already have been counted by the caller since it was - // embedded in the HTML - parser.parseStyleDeclaration(taintedCss); - } catch (IOException ioe) { - throw new ScanException(ioe); - } + try { + // parse the inline style declaration + // note this does not count against the size limit because it + // should already have been counted by the caller since it was + // embedded in the HTML + parser.parseStyleDeclaration(taintedCss); + } catch (IOException ioe) { + throw new ScanException(ioe); + } - parseImportedStylesheets(stylesheets, handler, errorMessages, sizeLimit); + parseImportedStylesheets(stylesheets, handler, errorMessages, sizeLimit); - return new CleanResults(startOfScan, handler.getCleanStylesheet(), null, errorMessages); + return new CleanResults(startOfScan, handler.getCleanStylesheet(), null, errorMessages); } /** @@ -226,7 +226,7 @@ public CleanResults scanInlineStyle(String taintedCss, String tagName, * @throws ScanException * if an error occurs during scanning */ - protected void parseImportedStylesheets(LinkedList stylesheets, CssHandler handler, + protected void parseImportedStylesheets(LinkedList stylesheets, CssHandler handler, List errorMessages, int sizeLimit) throws ScanException { // Implemented in ExternalCssScanner.java } diff --git a/src/main/java/org/owasp/validator/css/CssValidator.java b/src/main/java/org/owasp/validator/css/CssValidator.java index 052e89ed..8c92a500 100644 --- a/src/main/java/org/owasp/validator/css/CssValidator.java +++ b/src/main/java/org/owasp/validator/css/CssValidator.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2019, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * @@ -50,11 +50,10 @@ import org.w3c.css.sac.SimpleSelector; /** - * Encapsulates all the neceesary operations for validating individual eleements + * Encapsulates all the necessary operations for validating individual elements * of a stylesheet (namely: selectors, conditions and properties). * * @author Jason Li - * */ public class CssValidator { @@ -117,10 +116,9 @@ public boolean isValidProperty(String name, LexicalUnit lu) { * the name of the selector * @param selector * the object representation of the selector - * @param results - * the CleanResults object to add any error - * messages to * @return true if this selector name is valid; false otherwise + * @throws ScanException When there is a problem encountered + * while scanning this selector */ public boolean isValidSelector(String selectorName, Selector selector) throws ScanException { @@ -139,21 +137,21 @@ public boolean isValidSelector(String selectorName, Selector selector) DescendantSelector descSelector = (DescendantSelector) selector; return isValidSelector(selectorName, descSelector .getSimpleSelector()) - & isValidSelector(selectorName, descSelector + && isValidSelector(selectorName, descSelector .getAncestorSelector()); case Selector.SAC_CONDITIONAL_SELECTOR: // this is a compound selector - decompose into simple selectors ConditionalSelector condSelector = (ConditionalSelector) selector; return isValidSelector(selectorName, condSelector .getSimpleSelector()) - & isValidCondition(selectorName, condSelector + && isValidCondition(selectorName, condSelector .getCondition()); case Selector.SAC_DIRECT_ADJACENT_SELECTOR: // this is a compound selector - decompose into simple selectors SiblingSelector sibSelector = (SiblingSelector) selector; return isValidSelector(selectorName, sibSelector .getSiblingSelector()) - & isValidSelector(selectorName, sibSelector.getSelector()); + && isValidSelector(selectorName, sibSelector.getSelector()); case Selector.SAC_NEGATIVE_SELECTOR: // this is a compound selector with one simple selector return validateSimpleSelector((NegativeSelector) selector); @@ -172,9 +170,6 @@ & isValidCondition(selectorName, condSelector * * @param selector * the object representation of the selector - * @param results - * the CleanResults object to add any error - * messages to * @return true if this selector name is valid; false otherwise */ private boolean validateSimpleSelector(SimpleSelector selector) { @@ -185,7 +180,7 @@ private boolean validateSimpleSelector(SimpleSelector selector) { String selectorLowerCase = selector.toString().toLowerCase(); return policy.getCommonRegularExpressions("cssElementSelector").matches(selectorLowerCase) - & !policy.getCommonRegularExpressions("cssElementExclusion").matches(selectorLowerCase); + && !policy.getCommonRegularExpressions("cssElementExclusion").matches(selectorLowerCase); } /** @@ -196,10 +191,9 @@ private boolean validateSimpleSelector(SimpleSelector selector) { * the name of the selector that contains this condition * @param condition * the object representation of this condition - * @param results - * the CleanResults object to add any error - * messages to * @return true if this condition is valid; false otherwise + * @throws ScanException When there is a problem encountered + * while scanning this condition */ public boolean isValidCondition(String selectorName, Condition condition) throws ScanException { @@ -210,7 +204,7 @@ public boolean isValidCondition(String selectorName, Condition condition) CombinatorCondition comboCondition = (CombinatorCondition) condition; return isValidCondition(selectorName, comboCondition .getFirstCondition()) - & isValidCondition(selectorName, comboCondition + && isValidCondition(selectorName, comboCondition .getSecondCondition()); case Condition.SAC_CLASS_CONDITION: // this is a basic class condition; compare condition against @@ -260,9 +254,6 @@ & isValidCondition(selectorName, comboCondition * the positive pattern of valid conditions * @param exclusionPattern * the negative pattern of excluded conditions - * @param results - * the CleanResults object to add any error - * messages to * @return true if this selector name is valid; false otherwise */ private boolean validateCondition(AttributeCondition condition, @@ -272,7 +263,7 @@ private boolean validateCondition(AttributeCondition condition, // NOTE: intentionally using non-short-circuited AND operator to // generate all relevant error messages String otherLower = condition.toString().toLowerCase(); - return pattern.matches(otherLower) & !exclusionPattern.matches(otherLower); + return pattern.matches(otherLower) && !exclusionPattern.matches(otherLower); } /** @@ -293,7 +284,7 @@ private boolean validateValue(Property property, String value) { value = value.toLowerCase(); // check if the value matches any of the allowed literal values - Iterator allowedValues = property.getAllowedValues().iterator(); + Iterator allowedValues = property.getAllowedValues().iterator(); while (allowedValues.hasNext() && !isValid) { String allowedValue = (String) allowedValues.next(); @@ -303,7 +294,7 @@ private boolean validateValue(Property property, String value) { } // check if the value matches any of the allowed regular expressions - Iterator allowedRegexps = property.getAllowedRegExp().iterator(); + Iterator allowedRegexps = property.getAllowedRegExp().iterator(); while (allowedRegexps.hasNext() && !isValid) { Pattern pattern = (Pattern) allowedRegexps.next(); @@ -313,7 +304,7 @@ private boolean validateValue(Property property, String value) { } // check if the value matches any of the allowed shorthands - Iterator shorthandRefs = property.getShorthandRefs().iterator(); + Iterator shorthandRefs = property.getShorthandRefs().iterator(); while (shorthandRefs.hasNext() && !isValid) { String shorthandRef = (String) shorthandRefs.next(); Property shorthand = policy.getPropertyByName(shorthandRef); diff --git a/src/main/java/org/owasp/validator/css/ExternalCssScanner.java b/src/main/java/org/owasp/validator/css/ExternalCssScanner.java index 372e920e..d20646c4 100644 --- a/src/main/java/org/owasp/validator/css/ExternalCssScanner.java +++ b/src/main/java/org/owasp/validator/css/ExternalCssScanner.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2019, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * @@ -59,20 +59,13 @@ public ExternalCssScanner(InternalPolicy policy, ResourceBundle messages) { * Parses through a LinkedList of imported stylesheet * URIs, this method parses through those stylesheets and validates them * - * @param stylesheets - * the LinkedList of stylesheet URIs to - * parse - * @param handler - * the CssHandler to use for parsing - * @param errorMessages - * the list of error messages to append to - * @param sizeLimit - * the limit on the total size in bites of any imported - * stylesheets - * @throws ScanException - * if an error occurs during scanning + * @param stylesheets the LinkedList of stylesheet URIs to parse + * @param handler the CssHandler to use for parsing + * @param errorMessages the list of error messages to append to + * @param sizeLimit the limit on the total size in bites of any imported stylesheets + * @throws ScanException if an error occurs during scanning */ - protected void parseImportedStylesheets(LinkedList stylesheets, CssHandler handler, + protected void parseImportedStylesheets(LinkedList stylesheets, CssHandler handler, ArrayList errorMessages, int sizeLimit) throws ScanException { int importedStylesheets = 0; @@ -172,8 +165,7 @@ protected void parseImportedStylesheets(LinkedList stylesheets, CssHandler handl } } - } - } } - + } + } } diff --git a/src/main/java/org/owasp/validator/html/AntiSamy.java b/src/main/java/org/owasp/validator/html/AntiSamy.java index a51c1695..b7ec9151 100644 --- a/src/main/java/org/owasp/validator/html/AntiSamy.java +++ b/src/main/java/org/owasp/validator/html/AntiSamy.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2019, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * @@ -28,6 +28,8 @@ import org.owasp.validator.html.scan.AntiSamySAXScanner; import java.io.File; +import java.io.Reader; +import java.io.Writer; /** * @@ -37,13 +39,12 @@ * accessibility of the policy file. * * @author Arshan Dabirsiaghi - * */ public class AntiSamy { - public static int DOM = 0; - public static int SAX = 1; + public static final int DOM = 0; + public static final int SAX = 1; private Policy policy = null; @@ -58,17 +59,13 @@ public AntiSamy(Policy policy) { * The meat and potatoes. The scan() family of methods are the * only methods the outside world should be calling to invoke AntiSamy. * - * @param taintedHTML - * Untrusted HTML which may contain malicious code. + * @param taintedHTML Untrusted HTML which may contain malicious code. * @return A CleanResults object which contains information * about the scan (including the results). * @throws ScanException When there is a problem encountered * while scanning the HTML. - * @throws PolicyException When there is a problem reading the - * policy file. - * - */ - + * @throws PolicyException When there is a problem reading the policy file. + */ public CleanResults scan(String taintedHTML) throws ScanException, PolicyException { if (policy == null) { @@ -78,6 +75,17 @@ public CleanResults scan(String taintedHTML) throws ScanException, PolicyExcepti return this.scan(taintedHTML, this.policy, SAX); } + /** + * This method sets scan() to use the specified scan type. + * + * @param taintedHTML Untrusted HTML which may contain malicious code. + * @param scanType The type of scan (DOM or SAX). + * @return A CleanResults object which contains information + * about the scan (including the results). + * @throws ScanException When there is a problem encountered + * while scanning the HTML. + * @throws PolicyException When there is a problem reading the policy file. + */ public CleanResults scan(String taintedHTML, int scanType) throws ScanException, PolicyException { if (policy == null) { @@ -88,11 +96,31 @@ public CleanResults scan(String taintedHTML, int scanType) throws ScanException, /** * This method wraps scan() using the Policy object passed in. + * + * @param taintedHTML Untrusted HTML which may contain malicious code. + * @param policy The custom policy to enforce. + * @return A CleanResults object which contains information + * about the scan (including the results). + * @throws ScanException When there is a problem encountered + * while scanning the HTML. + * @throws PolicyException When there is a problem reading the policy file. */ public CleanResults scan(String taintedHTML, Policy policy) throws ScanException, PolicyException { return new AntiSamyDOMScanner(policy).scan(taintedHTML); } + /** + * This method wraps scan() using the Policy object passed in and the specified scan type. + * + * @param taintedHTML Untrusted HTML which may contain malicious code. + * @param policy The custom policy to enforce. + * @param scanType The type of scan (DOM or SAX). + * @return A CleanResults object which contains information + * about the scan (including the results). + * @throws ScanException When there is a problem encountered + * while scanning the HTML. + * @throws PolicyException When there is a problem reading the policy file. + */ public CleanResults scan(String taintedHTML, Policy policy, int scanType) throws ScanException, PolicyException { if (scanType == DOM) { @@ -101,9 +129,34 @@ public CleanResults scan(String taintedHTML, Policy policy, int scanType) throws return new AntiSamySAXScanner(policy).scan(taintedHTML); } } + + /** + * Use this method if caller has Streams rather than Strings for I/O + * Useful for servlets where the response is very large and we don't validate, + * simply encode as bytes are consumed from the stream. + * @param reader Reader that produces the input, possibly a little at a time + * @param writer Writer that receives the cleaned output, possibly a little at a time + * @param policy Policy that directs the scan + * @return CleanResults where the cleanHtml is null. If caller wants the clean HTML, it + * must capture the writer's contents. When using Streams, caller generally + * doesn't want to create a single string containing clean HTML. + * @throws ScanException When there is a problem encountered + * while scanning the HTML. + */ + public CleanResults scan(Reader reader, Writer writer, Policy policy) throws ScanException { + return (new AntiSamySAXScanner(policy)).scan(reader, writer); + } /** - * This method wraps scan() using the Policy object passed in. + * This method wraps scan() using the Policy in the specified file. + * + * @param taintedHTML Untrusted HTML which may contain malicious code. + * @param filename The file name of the custom policy to enforce. + * @return A CleanResults object which contains information + * about the scan (including the results). + * @throws ScanException When there is a problem encountered + * while scanning the HTML. + * @throws PolicyException When there is a problem reading the policy file. */ public CleanResults scan(String taintedHTML, String filename) throws ScanException, PolicyException { @@ -113,8 +166,15 @@ public CleanResults scan(String taintedHTML, String filename) throws ScanExcepti } /** - * This method wraps scan() using the policy File object passed - * in. + * This method wraps scan() using the policy File object passed in. + * + * @param taintedHTML Untrusted HTML which may contain malicious code. + * @param policyFile The File object of the custom policy to enforce. + * @return A CleanResults object which contains information + * about the scan (including the results). + * @throws ScanException When there is a problem encountered + * while scanning the HTML. + * @throws PolicyException When there is a problem reading the policy file. */ public CleanResults scan(String taintedHTML, File policyFile) throws ScanException, PolicyException { diff --git a/src/main/java/org/owasp/validator/html/CleanResults.java b/src/main/java/org/owasp/validator/html/CleanResults.java index 55b78f4a..6d5b6570 100644 --- a/src/main/java/org/owasp/validator/html/CleanResults.java +++ b/src/main/java/org/owasp/validator/html/CleanResults.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2019, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * @@ -25,7 +25,6 @@ package org.owasp.validator.html; import java.util.ArrayList; -import java.util.Date; import java.util.List; import java.util.concurrent.Callable; @@ -46,6 +45,7 @@ public class CleanResults { private List errorMessages = new ArrayList(); private Callable cleanHTML; + private long startOfScan; private long elapsedScan; private DocumentFragment cleanXMLDocumentFragment; @@ -59,6 +59,7 @@ public CleanResults() { public CleanResults(long startOfScan, final String cleanHTML, DocumentFragment XMLDocumentFragment, List errorMessages) { + this.startOfScan = startOfScan; this.elapsedScan = System.currentTimeMillis() - startOfScan; this.cleanXMLDocumentFragment = XMLDocumentFragment; this.cleanHTML = new Callable() { @@ -116,9 +117,21 @@ public double getScanTime() { /** * Return the number of errors encountered during filtering. + * + * @return The number of errors encountered during filtering. */ public int getNumberOfErrors() { return errorMessages.size(); } + /** + * Get the time the scan started. + * + * @return time that scan started in ms + */ + public long getStartOfScan() + { + return startOfScan; + } + } diff --git a/src/main/java/org/owasp/validator/html/Policy.java b/src/main/java/org/owasp/validator/html/Policy.java index 85eef858..b08c8b40 100644 --- a/src/main/java/org/owasp/validator/html/Policy.java +++ b/src/main/java/org/owasp/validator/html/Policy.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2013, Arshan Dabirsiaghi, Jason Li, Kristian Rosenvold + * Copyright (c) 2007-2019, Arshan Dabirsiaghi, Jason Li, Kristian Rosenvold * * All rights reserved. * @@ -52,9 +52,7 @@ import static org.owasp.validator.html.util.XMLUtil.getAttributeValue; /** - * Policy.java - *

- * This file holds the model for our policy engine. + * Policy.java - This file holds the model for our policy engine. * * @author Arshan Dabirsiaghi */ @@ -102,7 +100,11 @@ public class Policy { private final TagMatcher requiresClosingTagsMatcher; /** - * The path to the base policy file, used to resolve relative paths when reading included files + * Get the Tag specified by the provided tag name. + * + * @param tagName + * The name of the Tag to return. + * @return The requested Tag, or null if it doesn't exist. */ public Tag getTagByLowercaseName(String tagName) { return tagRules.get(tagName); @@ -191,8 +193,8 @@ public static Policy getInstance(File file) throws PolicyException { /** * This retrieves a Policy based on the URL object passed in. - *

- * NOTE: This is the only factory method that will work with tags + *

+ * NOTE: This is the only factory method that will work with <include> tags * in AntiSamy policy files. * * @param url A URL object which contains the XML policy information. @@ -270,6 +272,7 @@ protected static Element getTopLevelElement(URL baseUrl) throws PolicyException return getTopLevelElement( source); } catch (SAXException e) { + // This can't actually happen. See JavaDoc for resolveEntity(String, URL) throw new PolicyException(e); } catch (IOException e) { throw new PolicyException(e); @@ -749,7 +752,7 @@ private static void parseCSSRules(Element root, Map cssRules1, /** - * A simple method for returning on of the entries by + * A simple method for returning on of the <global-attribute> entries by * name. * * @param name The name of the global-attribute we want to look up. @@ -761,7 +764,7 @@ public Attribute getGlobalAttributeByName(String name) { } /** - * A method for returning one of the dynamic entries by + * A method for returning one of the dynamic <global-attribute> entries by * name. * * @param name The name of the dynamic global-attribute we want to look up. @@ -800,6 +803,7 @@ public TagMatcher getRequiresClosingTags() { /** * Return a directive value based on a lookup name. * + * @param name The name of the directive we want to look up. * @return A String object containing the directive associated with the lookup name, or null if none is found. */ public String getDirective(String name) { @@ -808,7 +812,14 @@ public String getDirective(String name) { /** - * Resolves public & system ids to files stored within the JAR. + * Resolves public and system IDs to files stored within the JAR. + * + * @param systemId The name of the entity we want to look up. + * @param baseUrl The base location of the entity. + * @return A String object containing the directive associated with the lookup name, or null if none is found. + * @throws IOException if the specified URL can't be opened. + * @throws SAXException This exception can't actually be thrown, but left in the method signature for + * API compatibility reasons. */ public static InputSource resolveEntity(final String systemId, URL baseUrl) throws IOException, SAXException { InputSource source; diff --git a/src/main/java/org/owasp/validator/html/TagMatcher.java b/src/main/java/org/owasp/validator/html/TagMatcher.java index 7eae9f44..3f55cab2 100644 --- a/src/main/java/org/owasp/validator/html/TagMatcher.java +++ b/src/main/java/org/owasp/validator/html/TagMatcher.java @@ -23,9 +23,7 @@ */ package org.owasp.validator.html; -import java.util.ArrayList; import java.util.HashSet; -import java.util.List; import java.util.Set; /** diff --git a/src/main/java/org/owasp/validator/html/model/Attribute.java b/src/main/java/org/owasp/validator/html/model/Attribute.java index 03649fc5..57d25cd5 100644 --- a/src/main/java/org/owasp/validator/html/model/Attribute.java +++ b/src/main/java/org/owasp/validator/html/model/Attribute.java @@ -108,7 +108,7 @@ public String matcherRegEx(boolean hasNext){ /* * Go through and add static values to the regular expression. */ - Iterator allowedValues = this.allowedValues.iterator(); + Iterator allowedValues = this.allowedValues.iterator(); while (allowedValues.hasNext()) { String allowedValue = (String) allowedValues.next(); @@ -122,7 +122,7 @@ public String matcherRegEx(boolean hasNext){ /* * Add the regular expressions for this attribute value to the mother regular expression. */ - Iterator allowedRegExps = Arrays.asList((Pattern[]) this.allowedRegExps).iterator(); + Iterator allowedRegExps = Arrays.asList((Pattern[]) this.allowedRegExps).iterator(); while (allowedRegExps.hasNext()) { Pattern allowedRegExp = (Pattern) allowedRegExps.next(); regExp.append(allowedRegExp.pattern()); diff --git a/src/main/java/org/owasp/validator/html/model/Property.java b/src/main/java/org/owasp/validator/html/model/Property.java index 5959c9c4..46369863 100644 --- a/src/main/java/org/owasp/validator/html/model/Property.java +++ b/src/main/java/org/owasp/validator/html/model/Property.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2019, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * @@ -28,7 +28,6 @@ */ package org.owasp.validator.html.model; -import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.regex.Pattern; @@ -38,7 +37,6 @@ * or regular expressions) in order to be considered valid. * * @author Jason Li - * */ public class Property { private final String name; @@ -56,30 +54,32 @@ public Property(String name, List allowedRegexp3, List allowedV this.shorthandRefs = Collections.unmodifiableList(shortHandRefs); } - /** + /** * Return a List of allowed regular expressions - * @return A List of allowed regular expressions. + * @return The List of allowed regular expressions. */ - public List getAllowedRegExp() { + public List getAllowedRegExp() { return allowedRegExp; } - /** - * @return A List of allowed literal values. + /** + * Return a List of allowed literal values + * @return The List of allowed literal values. */ - public List getAllowedValues() { + public List getAllowedValues() { return allowedValues; } - /** - * @return A List of allowed shorthand references. + /** + * Return a List of allowed shorthand references + * @return The List of allowed shorthand references. */ - public List getShorthandRefs() { + public List getShorthandRefs() { return shorthandRefs; } - /** - * + /** + * Get the name of the property. * @return The name of the property. */ public String getName() { diff --git a/src/main/java/org/owasp/validator/html/model/Tag.java b/src/main/java/org/owasp/validator/html/model/Tag.java index b404b137..4c76accf 100644 --- a/src/main/java/org/owasp/validator/html/model/Tag.java +++ b/src/main/java/org/owasp/validator/html/model/Tag.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2019, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * @@ -29,7 +29,7 @@ /** * A model for HTML "tags" and the rules dictating their validation/filtration. Also contains information * about their allowed attributes. - *

+ *

* There is also some experimental (unused) code in here for generating a valid regular expression according to a policy * file on a per-tag basis. * @@ -38,8 +38,8 @@ public class Tag { /* - * These are the fields pulled from the policy XML. - */ + * These are the fields pulled from the policy XML. + */ private final Map allowedAttributes; private final String name; private final String action; @@ -77,9 +77,10 @@ public Tag mutateAction(String action) { /** * Returns a regular expression for validating individual tags. Not used by the AntiSamy scanner, but you might find some use for this. * - * @return A regular expression for the tag, i.e., "^$", or "" + * @return A regular expression for the tag, i.e., + * "^<b>$" + * or "<hr(\s)*(width='((\w){2,3}(\%)*)'>" */ - public String getRegularExpression() { /* diff --git a/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java b/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java index 97e1fd31..57ae10ef 100644 --- a/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java +++ b/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java @@ -4,7 +4,6 @@ import org.apache.xml.serialize.HTMLdtd; import org.apache.xml.serialize.OutputFormat; import org.owasp.validator.html.InternalPolicy; -import org.owasp.validator.html.Policy; import java.io.IOException; import java.io.Writer; diff --git a/src/main/java/org/owasp/validator/html/scan/ASXHTMLSerializer.java b/src/main/java/org/owasp/validator/html/scan/ASXHTMLSerializer.java index d3f49dd6..85cf18f4 100644 --- a/src/main/java/org/owasp/validator/html/scan/ASXHTMLSerializer.java +++ b/src/main/java/org/owasp/validator/html/scan/ASXHTMLSerializer.java @@ -3,7 +3,6 @@ import org.apache.xml.serialize.ElementState; import org.apache.xml.serialize.OutputFormat; import org.owasp.validator.html.InternalPolicy; -import org.owasp.validator.html.Policy; import org.owasp.validator.html.TagMatcher; import java.io.IOException; diff --git a/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java b/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java index 50fe0ee7..295fbb20 100644 --- a/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2019, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * @@ -33,27 +33,27 @@ public abstract class AbstractAntiSamyScanner { - protected final InternalPolicy policy; - protected final List errorMessages = new ArrayList(); + protected final InternalPolicy policy; + protected final List errorMessages = new ArrayList(); - protected static final ResourceBundle messages = getResourceBundle(); - protected final Locale locale = Locale.getDefault(); + protected static final ResourceBundle messages = getResourceBundle(); + protected final Locale locale = Locale.getDefault(); - protected boolean isNofollowAnchors = false; - protected boolean isValidateParamAsEmbed = false; + protected boolean isNofollowAnchors = false; + protected boolean isValidateParamAsEmbed = false; - public abstract CleanResults scan(String html) throws ScanException; + public abstract CleanResults scan(String html) throws ScanException; - /** @noinspection UnusedDeclaration TODO: Investigate */ + /* UnusedDeclaration TODO: Investigate */ public abstract CleanResults getResults(); - public AbstractAntiSamyScanner(Policy policy) { - this.policy = (InternalPolicy) policy; - } + public AbstractAntiSamyScanner(Policy policy) { + this.policy = (InternalPolicy) policy; + } - public AbstractAntiSamyScanner() throws PolicyException { - policy = (InternalPolicy) Policy.getInstance(); - } + public AbstractAntiSamyScanner() throws PolicyException { + policy = (InternalPolicy) Policy.getInstance(); + } private static ResourceBundle getResourceBundle() { try { @@ -64,11 +64,11 @@ private static ResourceBundle getResourceBundle() { } protected void addError(String errorKey, Object[] objs) { - errorMessages.add(ErrorMessageUtil.getMessage(messages, errorKey, objs)); - } - - - protected OutputFormat getOutputFormat() { + errorMessages.add(ErrorMessageUtil.getMessage(messages, errorKey, objs)); + } + + + protected OutputFormat getOutputFormat() { OutputFormat format = new OutputFormat(); format.setOmitXMLDeclaration(policy.isOmitXmlDeclaration()); @@ -83,29 +83,28 @@ protected OutputFormat getOutputFormat() { } return format; - } - - /** @noinspection deprecation*/ + } + protected org.apache.xml.serialize.HTMLSerializer getHTMLSerializer(Writer w, OutputFormat format) { - if(policy.isUseXhtml()) { - return new ASXHTMLSerializer(w, format, policy); + if(policy.isUseXhtml()) { + return new ASXHTMLSerializer(w, format, policy); } return new ASHTMLSerializer(w, format, policy); - } + } - protected String trim(String original, String cleaned) { + protected String trim(String original, String cleaned) { if (cleaned.endsWith("\n")) { if (!original.endsWith("\n")) { if (cleaned.endsWith("\r\n")) { - cleaned = cleaned.substring(0, cleaned.length() - 2); + cleaned = cleaned.substring(0, cleaned.length() - 2); } else if (cleaned.endsWith("\n")) { - cleaned = cleaned.substring(0, cleaned.length() - 1); + cleaned = cleaned.substring(0, cleaned.length() - 1); } } } return cleaned; - } + } } diff --git a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java index 945ef0a1..c8abee7b 100644 --- a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2019, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * @@ -57,7 +57,6 @@ * through a AntiSamy.scan() method. * * @author Arshan Dabirsiaghi - * */ public class AntiSamyDOMScanner extends AbstractAntiSamyScanner { @@ -90,7 +89,7 @@ public AntiSamyDOMScanner(Policy policy) { super(policy); } - /** @noinspection UnusedDeclaration Todo Investigate */ + /* UnusedDeclaration TODO Investigate */ public AntiSamyDOMScanner() throws PolicyException { super(); } @@ -98,14 +97,14 @@ public AntiSamyDOMScanner() throws PolicyException { /** * This is where the magic lives. * - * * @param html * A String whose contents we want to scan. * @return A CleanResults object with an * XMLDocumentFragment object and its String * representation, as well as some scan statistics. - * @throws ScanException - */ + * @throws ScanException When there is a problem encountered + * while scanning the HTML. + */ public CleanResults scan(String html) throws ScanException { if (html == null) { @@ -313,7 +312,7 @@ private void encodeTag(int currentStackDepth, Element ele, String tagName, NodeL addError(ErrorMessageUtil.ERROR_TAG_ENCODED, new Object[]{HTMLEntityEncoder.htmlEntityEncode(tagName)}); processChildren(eleChildNodes, currentStackDepth); - /* + /* * Transform the tag to text, HTML-encode it and promote the * children. The tag will be kept in the fragment as one or two text * Nodes located before and after the children; representing how the @@ -350,7 +349,7 @@ private void actionValidate(int currentStackDepth, Element ele, Node parentNode, } } - /* + /* * Check to see if it's a ", policy, AntiSamy.DOM).getCleanHTML().contains("z-index")); assertTrue(!as.scan("", policy, AntiSamy.SAX).getCleanHTML().contains("z-index")); - } /* - * Test a bunch of strings that have tweaked the XML parsing capabilities of - * NekoHTML. - */ + * Test a bunch of strings that have tweaked the XML parsing capabilities of + * NekoHTML. + */ @Test public void IllegalXML() throws PolicyException { for (String BASE64_BAD_XML_STRING : BASE64_BAD_XML_STRINGS) { try { - String testStr = new String(Base64.decodeBase64(BASE64_BAD_XML_STRING.getBytes())); as.scan(testStr, policy, AntiSamy.DOM); as.scan(testStr, policy, AntiSamy.SAX); @@ -384,10 +389,9 @@ public void IllegalXML() throws PolicyException { public void issue12() throws ScanException, PolicyException { /* - * issues 12 (and 36, which was similar). empty tags cause display - * problems/"formjacking" - */ - + * issues 12 (and 36, which was similar). empty tags cause display + * problems/"formjacking" + */ Pattern p = Pattern.compile(".*.*"); String s1 = as.scan("
hello world


", policy, AntiSamy.DOM).getCleanHTML(); @@ -465,15 +469,17 @@ public void issue30() throws ScanException, PolicyException { } @Test - public void isssue31() throws ScanException, PolicyException { + public void issue31() throws ScanException, PolicyException { - String test = "foo"; + String test = "foo"; Policy revised = policy.cloneWithDirective("onUnknownTag", "encode"); CleanResults cr = as.scan(test, revised, AntiSamy.DOM); String s = cr.getCleanHTML(); assertFalse(!s.contains("<g>")); + assertFalse(!s.contains("</g>")); s = as.scan(test, revised, AntiSamy.SAX).getCleanHTML(); assertFalse(!s.contains("<g>")); + assertFalse(!s.contains("</g>")); Tag tag = policy.getTagByLowercaseName("b").mutateAction("encode"); Policy policy1 = policy.mutateTag(tag); @@ -482,11 +488,13 @@ public void isssue31() throws ScanException, PolicyException { s = cr.getCleanHTML(); assertFalse(!s.contains("<b>")); + assertFalse(!s.contains("</b>")); cr = as.scan(test, policy1, AntiSamy.SAX); s = cr.getCleanHTML(); assertFalse(!s.contains("<b>")); + assertFalse(!s.contains("</b>")); } @Test @@ -585,27 +593,21 @@ public void issue38() throws ScanException, PolicyException { s = "foo@import 'x';bar"; as.scan(s, policy, AntiSamy.DOM); as.scan(s, policy, AntiSamy.SAX); - } @Test public void issue40() throws ScanException, PolicyException { - /* issue #40 - handling "; Policy revised = policy.cloneWithDirective(Policy.PRESERVE_SPACE, "true"); CleanResults cr = as.scan(s, revised, AntiSamy.DOM); - // System.out.println("here: " + cr.getCleanHTML()); assertTrue(cr.getCleanHTML().contains("print, projection, screen")); - // System.out.println(cr.getCleanHTML()); cr = as.scan(s, revised, AntiSamy.SAX); - // System.out.println(cr.getCleanHTML()); assertTrue(cr.getCleanHTML().contains("print, projection, screen")); - } @Test @@ -678,16 +680,13 @@ public void issue41() throws ScanException, PolicyException { assertTrue(!as.scan(s, revised2, AntiSamy.DOM).getCleanHTML().contains("" + ""; as.scan(s, policy, AntiSamy.DOM); assertEquals(as.scan(s, policy, AntiSamy.DOM).getNumberOfErrors(), 3); @@ -699,11 +698,10 @@ public void issue44() throws ScanException, PolicyException { @Test public void issue51() throws ScanException, PolicyException { - /* issue #51 - offsite urls with () are found to be invalid */ + /* issue #51 - offsite URLs with () are found to be invalid */ String s = "test"; CleanResults cr = as.scan(s, policy, AntiSamy.DOM); - // System.out.println(cr.getCleanHTML()); assertEquals(cr.getNumberOfErrors(), 0); cr = as.scan(s, policy, AntiSamy.SAX); @@ -711,7 +709,7 @@ public void issue51() throws ScanException, PolicyException { } @Test - public void isssue56() throws ScanException, PolicyException { + public void issue56() throws ScanException, PolicyException { /* issue #56 - unnecessary spaces */ String s = "Hello World!"; @@ -754,7 +752,6 @@ public void issue61() throws ScanException, PolicyException { @Test public void issue69() throws ScanException, PolicyException { - /* issue #69 - char attribute should allow single char or entity ref */ String s = "
test
"; @@ -795,17 +792,16 @@ public void CDATAByPass() throws ScanException, PolicyException { assertTrue(crSax.contains("<script") && !crDom.contains("") && !crDom.contains("")); assertTrue(!crSax.contains("") && !crSax.contains("")); - sb = new StringBuilder(); + StringBuilder sb = new StringBuilder(); sb.append("foobar"); sb.append(""); @@ -936,10 +929,6 @@ public void issue112() throws ScanException, PolicyException { @Test public void nestedCdataAttacks() throws ScanException, PolicyException { - /* - * #112 - empty tag becomes self closing - */ - /* * Testing for nested CDATA attacks against the SAX parser. @@ -983,10 +972,8 @@ public void issue101InternationalCharacterSupport() throws ScanException, Policy public void iframeAsReportedByOndrej() throws ScanException, PolicyException { String html = ""; - Policy revised; - Tag tag = new Tag("iframe", Collections.emptyMap(), Policy.ACTION_VALIDATE); - revised = policy.addTagRule(tag); + Policy revised = policy.addTagRule(tag); String crDom = as.scan(html, revised, AntiSamy.DOM).getCleanHTML(); String crSax = as.scan(html, revised, AntiSamy.SAX).getCleanHTML(); @@ -1000,46 +987,34 @@ public void iframeAsReportedByOndrej() throws ScanException, PolicyException { * have an action set to "validate" (may be implicit) in the policy file. */ @Test - public void nofollowAnchors() { - - try { - - // if we have activated nofollowAnchors - String val = policy.getDirective(Policy.ANCHORS_NOFOLLOW); - - Policy revisedPolici = policy.cloneWithDirective(Policy.ANCHORS_NOFOLLOW, "true"); - - // adds when not present - - assertTrue(as.scan("link", revisedPolici, AntiSamy.DOM).getCleanHTML().contains("link")); - assertTrue(as.scan("link", revisedPolici, AntiSamy.SAX).getCleanHTML().contains("link")); + public void nofollowAnchors() throws ScanException, PolicyException { - // adds properly even with bad attr - assertTrue(as.scan("link", revisedPolici, AntiSamy.DOM).getCleanHTML().contains("link")); - assertTrue(as.scan("link", revisedPolici, AntiSamy.SAX).getCleanHTML().contains("link")); + // if we have activated nofollowAnchors + Policy revisedPolicy = policy.cloneWithDirective(Policy.ANCHORS_NOFOLLOW, "true"); - // rel with bad value gets corrected - assertTrue(as.scan("link", revisedPolici, AntiSamy.DOM).getCleanHTML().contains("link")); - assertTrue(as.scan("link", revisedPolici, AntiSamy.SAX).getCleanHTML().contains("link")); + // adds when not present + assertTrue(as.scan("link", revisedPolicy, AntiSamy.DOM).getCleanHTML().contains("link")); + assertTrue(as.scan("link", revisedPolicy, AntiSamy.SAX).getCleanHTML().contains("link")); - // correct attribute doesnt get messed with - assertTrue(as.scan("link", policy, AntiSamy.DOM).getCleanHTML().contains("link")); - assertTrue(as.scan("link", policy, AntiSamy.SAX).getCleanHTML().contains("link")); + // adds properly even with bad attr + assertTrue(as.scan("link", revisedPolicy, AntiSamy.DOM).getCleanHTML().contains("link")); + assertTrue(as.scan("link", revisedPolicy, AntiSamy.SAX).getCleanHTML().contains("link")); - // if two correct attributes, only one remaining after scan - assertTrue(as.scan("link", policy, AntiSamy.DOM).getCleanHTML().contains("link")); - assertTrue(as.scan("link", policy, AntiSamy.SAX).getCleanHTML().contains("link")); + // rel with bad value gets corrected + assertTrue(as.scan("link", revisedPolicy, AntiSamy.DOM).getCleanHTML().contains("link")); + assertTrue(as.scan("link", revisedPolicy, AntiSamy.SAX).getCleanHTML().contains("link")); - // test if value is off - does it add? + // correct attribute doesn't get messed with + assertTrue(as.scan("link", policy, AntiSamy.DOM).getCleanHTML().contains("link")); + assertTrue(as.scan("link", policy, AntiSamy.SAX).getCleanHTML().contains("link")); - assertTrue(!as.scan("a href=\"blah\">link", policy, AntiSamy.DOM).getCleanHTML().contains("nofollow")); - assertTrue(!as.scan("a href=\"blah\">link", policy, AntiSamy.SAX).getCleanHTML().contains("nofollow")); + // if two correct attributes, only one remaining after scan + assertTrue(as.scan("link", policy, AntiSamy.DOM).getCleanHTML().contains("link")); + assertTrue(as.scan("link", policy, AntiSamy.SAX).getCleanHTML().contains("link")); - policy.cloneWithDirective(Policy.ANCHORS_NOFOLLOW, val); - - } catch (Exception e) { - fail("Caught exception in testNofollowAnchors(): " + e.getMessage()); - } + // test if value is off - does it add? + assertTrue(!as.scan("a href=\"blah\">link", policy, AntiSamy.DOM).getCleanHTML().contains("nofollow")); + assertTrue(!as.scan("a href=\"blah\">link", policy, AntiSamy.SAX).getCleanHTML().contains("nofollow")); } @Test @@ -1051,11 +1026,11 @@ public void validateParamAsEmbed() throws ScanException, PolicyException { String input = ""; String expectedOutput = ""; CleanResults cr = as.scan(input, revised, AntiSamy.DOM); - assertTrue(cr.getCleanHTML().contains(expectedOutput)); + assertThat(cr.getCleanHTML(), containsString(expectedOutput)); String saxExpectedOutput = ""; cr = as.scan(input, revised, AntiSamy.SAX); - assertTrue(cr.getCleanHTML().equals(saxExpectedOutput)); + assertThat(cr.getCleanHTML(), equalTo(saxExpectedOutput)); // now what if someone sticks malicious URL in the value of the // value attribute in the param tag? remove that param tag @@ -1063,10 +1038,10 @@ public void validateParamAsEmbed() throws ScanException, PolicyException { expectedOutput = ""; saxExpectedOutput = ""; cr = as.scan(input, revised, AntiSamy.DOM); - assertTrue(cr.getCleanHTML().contains(expectedOutput)); + assertThat(cr.getCleanHTML(), containsString(expectedOutput)); cr = as.scan(input, revised, AntiSamy.SAX); - assertTrue(cr.getCleanHTML().equals(saxExpectedOutput)); + assertThat(cr.getCleanHTML(), equalTo(saxExpectedOutput)); // now what if someone sticks malicious URL in the value of the src // attribute in the embed tag? remove that embed tag @@ -1075,9 +1050,9 @@ public void validateParamAsEmbed() throws ScanException, PolicyException { saxExpectedOutput = ""; cr = as.scan(input, revised, AntiSamy.DOM); - assertTrue(cr.getCleanHTML().contains(expectedOutput)); + assertThat(cr.getCleanHTML(), containsString(expectedOutput)); CleanResults scan = as.scan(input, revised, AntiSamy.SAX); - assertTrue(scan.getCleanHTML().equals(saxExpectedOutput)); + assertThat(scan.getCleanHTML(), equalTo(saxExpectedOutput)); } @Test @@ -1183,7 +1158,7 @@ public void testOnsiteRegex() throws ScanException, PolicyException { void assertIsGoodOnsiteURL(String url) throws ScanException, PolicyException { String html = as.scan("X", policy, AntiSamy.DOM).getCleanHTML(); - assertTrue(html.contains("href=\"")); + assertThat(html, containsString("href=\"")); } @Test @@ -1208,11 +1183,9 @@ public void issue75() throws ScanException, PolicyException { as.scan("", pol, AntiSamy.SAX); } - @Test public void issue144() throws ScanException, PolicyException { String pinata = "pi\u00f1ata"; - System.out.println(pinata); CleanResults results = as.scan(pinata, policy, AntiSamy.DOM); String cleanHTML = results.getCleanHTML(); assertEquals(pinata, cleanHTML); @@ -1249,11 +1222,8 @@ public void testXSSInAntiSamy151() throws ScanException, PolicyException { String test = "whatever"; CleanResults results_sax = as.scan(test, policy, AntiSamy.SAX); - - CleanResults results_dom = as.scan(test, policy, AntiSamy.DOM); - assertEquals( results_sax.getCleanHTML(), results_dom.getCleanHTML()); assertEquals("whatever", results_dom.getCleanHTML()); } @@ -1262,8 +1232,6 @@ public void testXSSInAntiSamy151() throws ScanException, PolicyException { public void testAnotherXSS() throws ScanException, PolicyException { String test = "foo"; CleanResults results_sax = as.scan(test, policy, AntiSamy.SAX); - - CleanResults results_dom = as.scan(test, policy, AntiSamy.DOM); assertEquals( results_sax.getCleanHTML(), results_dom.getCleanHTML()); @@ -1272,9 +1240,9 @@ public void testAnotherXSS() throws ScanException, PolicyException { @Test public void testIssue2() throws ScanException, PolicyException { - String test = ""; - assertFalse(as.scan(test, policy, AntiSamy.DOM).getCleanHTML().contains("alert")); - assertFalse(as.scan(test, policy, AntiSamy.SAX).getCleanHTML().contains("alert")); + String test = ""; + assertThat(as.scan(test, policy, AntiSamy.DOM).getCleanHTML(), not(containsString("alert"))); + assertThat(as.scan(test, policy, AntiSamy.SAX).getCleanHTML(), not(containsString("alert"))); } /* @@ -1282,13 +1250,150 @@ public void testIssue2() throws ScanException, PolicyException { */ @Test public void testUnknownTags() throws ScanException, PolicyException { - String test = "<%/onmouseover=prompt(1)>"; - CleanResults saxResults = as.scan(test, policy, AntiSamy.SAX); - CleanResults domResults = as.scan(test, policy, AntiSamy.DOM); - System.out.println("OnUnknown (SAX): " + saxResults.getCleanHTML()); - System.out.println("OnUnknown (DOM): " + domResults.getCleanHTML()); - assertFalse(saxResults.getCleanHTML().contains("<%/")); - assertFalse(domResults.getCleanHTML().contains("<%/")); + String test = "<%/onmouseover=prompt(1)>"; + CleanResults saxResults = as.scan(test, policy, AntiSamy.SAX); + CleanResults domResults = as.scan(test, policy, AntiSamy.DOM); + assertThat(saxResults.getCleanHTML(), not(containsString("<%/"))); + assertThat(domResults.getCleanHTML(), not(containsString("<%/"))); } + + @Test + public void testStreamScan() throws ScanException, PolicyException, InterruptedException, ExecutionException { + String testImgSrcURL = "whatever" + testImgSrcURL + "onmouseover=\"alert('xss')\">"); + Writer writer = new StringWriter(); + as.scan(reader, writer, policy); + String cleanHtml = writer.toString().trim(); + assertEquals("whatever" + testImgSrcURL + "/>", cleanHtml); + } + + @Test + public void testGithubIssue23() throws ScanException, PolicyException { + + // Antisamy Stripping nested lists and tables + String test23 = "
  • one
  • two
  • three
    • a
    • b
"; + // Issue claims you end up with this: + //
  • one
  • two
  • three
    • a
    • b
    + // Meaning the
  • a
  • b
  • elements were moved outside of the nested
      list they were in + + // The a.replaceAll("\\s","") is used to strip out all the whitespace in the CleanHTML so we can successfully find + // what we expect to find. + assertThat(as.scan(test23, policy, AntiSamy.SAX).getCleanHTML().replaceAll("\\s",""), containsString("
      • a
      • ")); + assertThat(as.scan(test23, policy, AntiSamy.DOM).getCleanHTML().replaceAll("\\s",""), containsString("
        • a
        • ")); + + // However, the test above can't replicate this misbehavior. + } + + // TODO: This issue is a valid enhancement request we plan to implement in the future. + // Commenting out the test case for now so test failures aren't included in a released version of AntiSamy. +/* @Test + public void testGithubIssue24() throws ScanException, PolicyException { + + // if we have onUnknownTag set to encode, it still strips out the @ and everything else after the it + // DOM Parser actually rips out the entire value even with onUnknownTag set + TestPolicy revisedPolicy = policy.cloneWithDirective("onUnknownTag", "encode"); + + String email = "name@mail.com"; + String test24 = "firstname,lastname<" + email + ">"; + assertThat(as.scan(test24, revisedPolicy, AntiSamy.SAX).getCleanHTML(), containsString(email)); + assertThat(as.scan(test24, revisedPolicy, AntiSamy.DOM).getCleanHTML(), containsString(email)); + } +*/ + @Test + public void testGithubIssue26() throws ScanException, PolicyException { + // Potential bypass (False positive) + String test26 = ""><img src=a onerror=alert(1)>"; + // Issue claims you end up with this: + // > + + assertThat(as.scan(test26, policy, AntiSamy.SAX).getCleanHTML(), not(containsString(""))); + assertThat(as.scan(test26, policy, AntiSamy.DOM).getCleanHTML(), not(containsString(""))); + + // But you actually end up with this: "><img src=a onerror=alert(1)> -- Which is as expected + } + + @Test + public void testGithubIssue27() throws ScanException, PolicyException { + // This test doesn't cause an ArrayIndexOutOfBoundsException, as reported in this issue even though it + // replicates the test as described. + String test27 = "my &test"; + assertThat(as.scan(test27, policy, AntiSamy.DOM).getCleanHTML(), containsString("test")); + assertThat(as.scan(test27, policy, AntiSamy.SAX).getCleanHTML(), containsString("test")); + } + +static final String test33 = "\n" + + "\n" + + " Test\n" + + "\n" + + "\n" + + "

          Tricky Encoding

          \n" + + "

          NOT Sanitized by AntiSamy

          \n" + + "
            \n" + + "
          1. X:x
          2. \n" + + "
          3. X:y
          4. \n" + + + "
          5. X:x
          6. \n" + + "
          7. X:y
          8. \n" + + + "
          9. X:x
          10. \n" + + "
          11. X:y
          12. \n" + + + "
          13. X:x
          14. \n" + + "
          15. X:y
          16. \n" + + "
          \n" + + "

          Tricky Encoding with Ampersand Encoding

          \n" + + "

          AntiSamy turns harmless payload into XSS by just decoding the encoded ampersands in the href attribute\n" + + "

            \n" + + "
          1. X&#x3A;x
          2. \n" + + "
          3. X&#x3A;x
          4. \n" + + + "
          5. X&#x3A;x
          6. \n" + + "
          7. X&#x3A;x
          8. \n" + + + "
          9. X&#x3A;x
          10. \n" + + "
          11. X&#x3A;x
          12. \n" + + "
          \n" + + "

          Original without ampersand encoding

          \n" + + "\n" + + ""; + + @Test + public void testGithubIssue33() throws ScanException, PolicyException { + + // Potential bypass + + // Issue claims you end up with this: + // javascript:x=alert and other similar problems (javascript:x=alert,x%281%29) but you don't. + // So issue is a false positive and has been closed. + //System.out.println(as.scan(test33, policy, AntiSamy.SAX).getCleanHTML()); + + assertThat(as.scan(test33, policy, AntiSamy.SAX).getCleanHTML(), not(containsString("javascript:x=alert,x%281%29"))); + assertThat(as.scan(test33, policy, AntiSamy.DOM).getCleanHTML(), not(containsString("javascript:x=alert,x%281%29"))); + } + + // TODO: This issue is a valid enhancement request. We are trying to decide whether to implement in the future. + // Commenting out the test case for now so test failures aren't included in a released version of AntiSamy. +/* + @Test + public void testGithubIssue34a() throws ScanException, PolicyException { + + // bypass stripNonValidXMLCharacters + // Issue indicates: "
          Hello\\uD83D\\uDC95
          " should be sanitized to: "
          Hello
          " + + String test34a = "
          Hello\uD83D\uDC95
          "; + assertEquals("
          Hello
          ", as.scan(test34a, policy, AntiSamy.SAX).getCleanHTML()); + assertEquals("
          Hello
          ", as.scan(test34a, policy, AntiSamy.DOM).getCleanHTML()); + } + + @Test + public void testGithubIssue34b() throws ScanException, PolicyException { + // bypass stripNonValidXMLCharacters + // Issue indicates: "
          Hello\\uD83D\\uDC95
          " should be sanitized to: "
          Hello
          " + + String test34b = "\uD888"; + assertEquals("", as.scan(test34b, policy, AntiSamy.DOM).getCleanHTML()); + assertEquals("", as.scan(test34b, policy, AntiSamy.SAX).getCleanHTML()); + } +*/ } diff --git a/src/test/java/org/owasp/validator/html/test/LiteralTest.java b/src/test/java/org/owasp/validator/html/test/LiteralTest.java index 504bb752..0115aaf1 100644 --- a/src/test/java/org/owasp/validator/html/test/LiteralTest.java +++ b/src/test/java/org/owasp/validator/html/test/LiteralTest.java @@ -25,19 +25,19 @@ protected void setUp() throws Exception { */ //get Policy instance from a URL. URL url = getClass().getResource("/antisamy.xml"); - System.out.println("Loading policy from URL: " + url); + //System.out.println("Loading policy from URL: " + url); policy = Policy.getInstance(url); } public void testSAXGoodResult() throws Exception { - System.out.println("Policy: " + policy); + //System.out.println("Policy: " + policy); // good String html = "
          html
          "; CleanResults cleanResults = new AntiSamy(policy).scan(html, AntiSamy.SAX); - System.out.println("SAX cleanResults: " + cleanResults.getCleanHTML()); - System.out.println("SAX cleanResults error messages: " + cleanResults.getErrorMessages().size()); + //System.out.println("SAX cleanResults: " + cleanResults.getCleanHTML()); + //System.out.println("SAX cleanResults error messages: " + cleanResults.getErrorMessages().size()); for (String msg : cleanResults.getErrorMessages()) { System.out.println("error msg: " + msg); @@ -47,29 +47,29 @@ public void testSAXGoodResult() throws Exception { } public void testSAXBadResult() throws Exception { - System.out.println("Policy: " + policy); + //System.out.println("Policy: " + policy); // AntiSamy should complain about the attribute value "foo" ... but it is not String badHtml = "
          badhtml
          "; CleanResults cleanResults2 = new AntiSamy(policy).scan(badHtml, AntiSamy.SAX); - System.out.println("SAX cleanResults2: " + cleanResults2.getCleanHTML()); - System.out.println("SAX cleanResults2 error messages: " + cleanResults2.getErrorMessages().size()); + //System.out.println("SAX cleanResults2: " + cleanResults2.getCleanHTML()); + //System.out.println("SAX cleanResults2 error messages: " + cleanResults2.getErrorMessages().size()); for (String msg : cleanResults2.getErrorMessages()) { - System.out.println("error msg: " + msg); + // System.out.println("error msg: " + msg); } assertTrue(cleanResults2.getErrorMessages().size() > 0); } public void testDOMGoodResult() throws Exception { - System.out.println("Policy: " + policy); + //System.out.println("Policy: " + policy); // good String html = "
          html
          "; CleanResults cleanResults = new AntiSamy(policy).scan(html, AntiSamy.DOM); - System.out.println("DOM cleanResults error messages: " + cleanResults.getErrorMessages().size()); + //System.out.println("DOM cleanResults error messages: " + cleanResults.getErrorMessages().size()); for (String msg : cleanResults.getErrorMessages()) { System.out.println("error msg: " + msg); } @@ -78,16 +78,16 @@ public void testDOMGoodResult() throws Exception { } public void testDOMBadResult() throws Exception { - System.out.println("Policy: " + policy); + //System.out.println("Policy: " + policy); // AntiSamy should complain about the attribute value "foo" ... but it is not String badHtml = "
          badhtml
          "; CleanResults cleanResults2 = new AntiSamy(policy).scan(badHtml, AntiSamy.DOM); - System.out.println("DOM cleanResults2 error messages: " + cleanResults2.getErrorMessages().size()); + //System.out.println("DOM cleanResults2 error messages: " + cleanResults2.getErrorMessages().size()); for (String msg : cleanResults2.getErrorMessages()) { - System.out.println("error msg: " + msg); + // System.out.println("error msg: " + msg); } assertTrue(cleanResults2.getErrorMessages().size() > 0); }