CleanResults
object to add any error
- * messages to
* @return true if this selector name is valid; false otherwise
+ * @throws ScanException When there is a problem encountered
+ * while scanning this selector
*/
public boolean isValidSelector(String selectorName, Selector selector)
throws ScanException {
@@ -139,21 +137,21 @@ public boolean isValidSelector(String selectorName, Selector selector)
DescendantSelector descSelector = (DescendantSelector) selector;
return isValidSelector(selectorName, descSelector
.getSimpleSelector())
- & isValidSelector(selectorName, descSelector
+ && isValidSelector(selectorName, descSelector
.getAncestorSelector());
case Selector.SAC_CONDITIONAL_SELECTOR:
// this is a compound selector - decompose into simple selectors
ConditionalSelector condSelector = (ConditionalSelector) selector;
return isValidSelector(selectorName, condSelector
.getSimpleSelector())
- & isValidCondition(selectorName, condSelector
+ && isValidCondition(selectorName, condSelector
.getCondition());
case Selector.SAC_DIRECT_ADJACENT_SELECTOR:
// this is a compound selector - decompose into simple selectors
SiblingSelector sibSelector = (SiblingSelector) selector;
return isValidSelector(selectorName, sibSelector
.getSiblingSelector())
- & isValidSelector(selectorName, sibSelector.getSelector());
+ && isValidSelector(selectorName, sibSelector.getSelector());
case Selector.SAC_NEGATIVE_SELECTOR:
// this is a compound selector with one simple selector
return validateSimpleSelector((NegativeSelector) selector);
@@ -172,9 +170,6 @@ & isValidCondition(selectorName, condSelector
*
* @param selector
* the object representation of the selector
- * @param results
- * the CleanResults
object to add any error
- * messages to
* @return true if this selector name is valid; false otherwise
*/
private boolean validateSimpleSelector(SimpleSelector selector) {
@@ -185,7 +180,7 @@ private boolean validateSimpleSelector(SimpleSelector selector) {
String selectorLowerCase = selector.toString().toLowerCase();
return policy.getCommonRegularExpressions("cssElementSelector").matches(selectorLowerCase)
- & !policy.getCommonRegularExpressions("cssElementExclusion").matches(selectorLowerCase);
+ && !policy.getCommonRegularExpressions("cssElementExclusion").matches(selectorLowerCase);
}
/**
@@ -196,10 +191,9 @@ private boolean validateSimpleSelector(SimpleSelector selector) {
* the name of the selector that contains this condition
* @param condition
* the object representation of this condition
- * @param results
- * the CleanResults
object to add any error
- * messages to
* @return true if this condition is valid; false otherwise
+ * @throws ScanException When there is a problem encountered
+ * while scanning this condition
*/
public boolean isValidCondition(String selectorName, Condition condition)
throws ScanException {
@@ -210,7 +204,7 @@ public boolean isValidCondition(String selectorName, Condition condition)
CombinatorCondition comboCondition = (CombinatorCondition) condition;
return isValidCondition(selectorName, comboCondition
.getFirstCondition())
- & isValidCondition(selectorName, comboCondition
+ && isValidCondition(selectorName, comboCondition
.getSecondCondition());
case Condition.SAC_CLASS_CONDITION:
// this is a basic class condition; compare condition against
@@ -260,9 +254,6 @@ & isValidCondition(selectorName, comboCondition
* the positive pattern of valid conditions
* @param exclusionPattern
* the negative pattern of excluded conditions
- * @param results
- * the CleanResults
object to add any error
- * messages to
* @return true if this selector name is valid; false otherwise
*/
private boolean validateCondition(AttributeCondition condition,
@@ -272,7 +263,7 @@ private boolean validateCondition(AttributeCondition condition,
// NOTE: intentionally using non-short-circuited AND operator to
// generate all relevant error messages
String otherLower = condition.toString().toLowerCase();
- return pattern.matches(otherLower) & !exclusionPattern.matches(otherLower);
+ return pattern.matches(otherLower) && !exclusionPattern.matches(otherLower);
}
/**
@@ -293,7 +284,7 @@ private boolean validateValue(Property property, String value) {
value = value.toLowerCase();
// check if the value matches any of the allowed literal values
- Iterator allowedValues = property.getAllowedValues().iterator();
+ Iterator> allowedValues = property.getAllowedValues().iterator();
while (allowedValues.hasNext() && !isValid) {
String allowedValue = (String) allowedValues.next();
@@ -303,7 +294,7 @@ private boolean validateValue(Property property, String value) {
}
// check if the value matches any of the allowed regular expressions
- Iterator allowedRegexps = property.getAllowedRegExp().iterator();
+ Iterator> allowedRegexps = property.getAllowedRegExp().iterator();
while (allowedRegexps.hasNext() && !isValid) {
Pattern pattern = (Pattern) allowedRegexps.next();
@@ -313,7 +304,7 @@ private boolean validateValue(Property property, String value) {
}
// check if the value matches any of the allowed shorthands
- Iterator shorthandRefs = property.getShorthandRefs().iterator();
+ Iterator> shorthandRefs = property.getShorthandRefs().iterator();
while (shorthandRefs.hasNext() && !isValid) {
String shorthandRef = (String) shorthandRefs.next();
Property shorthand = policy.getPropertyByName(shorthandRef);
diff --git a/src/main/java/org/owasp/validator/css/ExternalCssScanner.java b/src/main/java/org/owasp/validator/css/ExternalCssScanner.java
index 372e920e..d20646c4 100644
--- a/src/main/java/org/owasp/validator/css/ExternalCssScanner.java
+++ b/src/main/java/org/owasp/validator/css/ExternalCssScanner.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2011, Arshan Dabirsiaghi, Jason Li
+ * Copyright (c) 2007-2019, Arshan Dabirsiaghi, Jason Li
*
* All rights reserved.
*
@@ -59,20 +59,13 @@ public ExternalCssScanner(InternalPolicy policy, ResourceBundle messages) {
* Parses through a LinkedList
of imported stylesheet
* URIs, this method parses through those stylesheets and validates them
*
- * @param stylesheets
- * the LinkedList
of stylesheet URIs to
- * parse
- * @param handler
- * the CssHandler
to use for parsing
- * @param errorMessages
- * the list of error messages to append to
- * @param sizeLimit
- * the limit on the total size in bites of any imported
- * stylesheets
- * @throws ScanException
- * if an error occurs during scanning
+ * @param stylesheets the LinkedList
of stylesheet URIs to parse
+ * @param handler the CssHandler
to use for parsing
+ * @param errorMessages the list of error messages to append to
+ * @param sizeLimit the limit on the total size in bites of any imported stylesheets
+ * @throws ScanException if an error occurs during scanning
*/
- protected void parseImportedStylesheets(LinkedList stylesheets, CssHandler handler,
+ protected void parseImportedStylesheets(LinkedList> stylesheets, CssHandler handler,
ArrayListscan()
family of methods are the
* only methods the outside world should be calling to invoke AntiSamy.
*
- * @param taintedHTML
- * Untrusted HTML which may contain malicious code.
+ * @param taintedHTML Untrusted HTML which may contain malicious code.
* @return A CleanResults
object which contains information
* about the scan (including the results).
* @throws ScanException When there is a problem encountered
* while scanning the HTML.
- * @throws PolicyException When there is a problem reading the
- * policy file.
- *
- */
-
+ * @throws PolicyException When there is a problem reading the policy file.
+ */
public CleanResults scan(String taintedHTML) throws ScanException, PolicyException {
if (policy == null) {
@@ -78,6 +75,17 @@ public CleanResults scan(String taintedHTML) throws ScanException, PolicyExcepti
return this.scan(taintedHTML, this.policy, SAX);
}
+ /**
+ * This method sets scan()
to use the specified scan type.
+ *
+ * @param taintedHTML Untrusted HTML which may contain malicious code.
+ * @param scanType The type of scan (DOM or SAX).
+ * @return A CleanResults
object which contains information
+ * about the scan (including the results).
+ * @throws ScanException When there is a problem encountered
+ * while scanning the HTML.
+ * @throws PolicyException When there is a problem reading the policy file.
+ */
public CleanResults scan(String taintedHTML, int scanType) throws ScanException, PolicyException {
if (policy == null) {
@@ -88,11 +96,31 @@ public CleanResults scan(String taintedHTML, int scanType) throws ScanException,
/**
* This method wraps scan()
using the Policy object passed in.
+ *
+ * @param taintedHTML Untrusted HTML which may contain malicious code.
+ * @param policy The custom policy to enforce.
+ * @return A CleanResults
object which contains information
+ * about the scan (including the results).
+ * @throws ScanException When there is a problem encountered
+ * while scanning the HTML.
+ * @throws PolicyException When there is a problem reading the policy file.
*/
public CleanResults scan(String taintedHTML, Policy policy) throws ScanException, PolicyException {
return new AntiSamyDOMScanner(policy).scan(taintedHTML);
}
+ /**
+ * This method wraps scan()
using the Policy object passed in and the specified scan type.
+ *
+ * @param taintedHTML Untrusted HTML which may contain malicious code.
+ * @param policy The custom policy to enforce.
+ * @param scanType The type of scan (DOM or SAX).
+ * @return A CleanResults
object which contains information
+ * about the scan (including the results).
+ * @throws ScanException When there is a problem encountered
+ * while scanning the HTML.
+ * @throws PolicyException When there is a problem reading the policy file.
+ */
public CleanResults scan(String taintedHTML, Policy policy, int scanType) throws ScanException, PolicyException {
if (scanType == DOM) {
@@ -101,9 +129,34 @@ public CleanResults scan(String taintedHTML, Policy policy, int scanType) throws
return new AntiSamySAXScanner(policy).scan(taintedHTML);
}
}
+
+ /**
+ * Use this method if caller has Streams rather than Strings for I/O
+ * Useful for servlets where the response is very large and we don't validate,
+ * simply encode as bytes are consumed from the stream.
+ * @param reader Reader that produces the input, possibly a little at a time
+ * @param writer Writer that receives the cleaned output, possibly a little at a time
+ * @param policy Policy that directs the scan
+ * @return CleanResults where the cleanHtml is null. If caller wants the clean HTML, it
+ * must capture the writer's contents. When using Streams, caller generally
+ * doesn't want to create a single string containing clean HTML.
+ * @throws ScanException When there is a problem encountered
+ * while scanning the HTML.
+ */
+ public CleanResults scan(Reader reader, Writer writer, Policy policy) throws ScanException {
+ return (new AntiSamySAXScanner(policy)).scan(reader, writer);
+ }
/**
- * This method wraps scan()
using the Policy object passed in.
+ * This method wraps scan()
using the Policy in the specified file.
+ *
+ * @param taintedHTML Untrusted HTML which may contain malicious code.
+ * @param filename The file name of the custom policy to enforce.
+ * @return A CleanResults
object which contains information
+ * about the scan (including the results).
+ * @throws ScanException When there is a problem encountered
+ * while scanning the HTML.
+ * @throws PolicyException When there is a problem reading the policy file.
*/
public CleanResults scan(String taintedHTML, String filename) throws ScanException, PolicyException {
@@ -113,8 +166,15 @@ public CleanResults scan(String taintedHTML, String filename) throws ScanExcepti
}
/**
- * This method wraps scan()
using the policy File object passed
- * in.
+ * This method wraps scan()
using the policy File object passed in.
+ *
+ * @param taintedHTML Untrusted HTML which may contain malicious code.
+ * @param policyFile The File object of the custom policy to enforce.
+ * @return A CleanResults
object which contains information
+ * about the scan (including the results).
+ * @throws ScanException When there is a problem encountered
+ * while scanning the HTML.
+ * @throws PolicyException When there is a problem reading the policy file.
*/
public CleanResults scan(String taintedHTML, File policyFile) throws ScanException, PolicyException {
diff --git a/src/main/java/org/owasp/validator/html/CleanResults.java b/src/main/java/org/owasp/validator/html/CleanResults.java
index 55b78f4a..6d5b6570 100644
--- a/src/main/java/org/owasp/validator/html/CleanResults.java
+++ b/src/main/java/org/owasp/validator/html/CleanResults.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2011, Arshan Dabirsiaghi, Jason Li
+ * Copyright (c) 2007-2019, Arshan Dabirsiaghi, Jason Li
*
* All rights reserved.
*
@@ -25,7 +25,6 @@
package org.owasp.validator.html;
import java.util.ArrayList;
-import java.util.Date;
import java.util.List;
import java.util.concurrent.Callable;
@@ -46,6 +45,7 @@ public class CleanResults {
private ListList
of allowed regular expressions
- * @return A List
of allowed regular expressions.
+ * @return The List of allowed regular expressions.
*/
- public List getAllowedRegExp() {
+ public ListList
of allowed literal values.
+ /**
+ * Return a List
of allowed literal values
+ * @return The List of allowed literal values.
*/
- public List getAllowedValues() {
+ public ListList
of allowed shorthand references.
+ /**
+ * Return a List
of allowed shorthand references
+ * @return The List of allowed shorthand references.
*/
- public List getShorthandRefs() {
+ public List"^<b>$"
+ * or "<hr(\s)*(width='((\w){2,3}(\%)*)'>"
*/
-
public String getRegularExpression() {
/*
diff --git a/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java b/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java
index 97e1fd31..57ae10ef 100644
--- a/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java
+++ b/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java
@@ -4,7 +4,6 @@
import org.apache.xml.serialize.HTMLdtd;
import org.apache.xml.serialize.OutputFormat;
import org.owasp.validator.html.InternalPolicy;
-import org.owasp.validator.html.Policy;
import java.io.IOException;
import java.io.Writer;
diff --git a/src/main/java/org/owasp/validator/html/scan/ASXHTMLSerializer.java b/src/main/java/org/owasp/validator/html/scan/ASXHTMLSerializer.java
index d3f49dd6..85cf18f4 100644
--- a/src/main/java/org/owasp/validator/html/scan/ASXHTMLSerializer.java
+++ b/src/main/java/org/owasp/validator/html/scan/ASXHTMLSerializer.java
@@ -3,7 +3,6 @@
import org.apache.xml.serialize.ElementState;
import org.apache.xml.serialize.OutputFormat;
import org.owasp.validator.html.InternalPolicy;
-import org.owasp.validator.html.Policy;
import org.owasp.validator.html.TagMatcher;
import java.io.IOException;
diff --git a/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java b/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java
index 50fe0ee7..295fbb20 100644
--- a/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java
+++ b/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2011, Arshan Dabirsiaghi, Jason Li
+ * Copyright (c) 2007-2019, Arshan Dabirsiaghi, Jason Li
*
* All rights reserved.
*
@@ -33,27 +33,27 @@
public abstract class AbstractAntiSamyScanner {
- protected final InternalPolicy policy;
- protected final ListAntiSamy.scan()
method.
*
* @author Arshan Dabirsiaghi
- *
*/
public class AntiSamyDOMScanner extends AbstractAntiSamyScanner {
@@ -90,7 +89,7 @@ public AntiSamyDOMScanner(Policy policy) {
super(policy);
}
- /** @noinspection UnusedDeclaration Todo Investigate */
+ /* UnusedDeclaration TODO Investigate */
public AntiSamyDOMScanner() throws PolicyException {
super();
}
@@ -98,14 +97,14 @@ public AntiSamyDOMScanner() throws PolicyException {
/**
* This is where the magic lives.
*
- *
* @param html
* A String whose contents we want to scan.
* @return A CleanResults
object with an
* XMLDocumentFragment
object and its String
* representation, as well as some scan statistics.
- * @throws ScanException
- */
+ * @throws ScanException When there is a problem encountered
+ * while scanning the HTML.
+ */
public CleanResults scan(String html) throws ScanException {
if (html == null) {
@@ -313,7 +312,7 @@ private void encodeTag(int currentStackDepth, Element ele, String tagName, NodeL
addError(ErrorMessageUtil.ERROR_TAG_ENCODED, new Object[]{HTMLEntityEncoder.htmlEntityEncode(tagName)});
processChildren(eleChildNodes, currentStackDepth);
- /*
+ /*
* Transform the tag to text, HTML-encode it and promote the
* children. The tag will be kept in the fragment as one or two text
* Nodes located before and after the children; representing how the
@@ -350,7 +349,7 @@ private void actionValidate(int currentStackDepth, Element ele, Node parentNode,
}
}
- /*
+ /*
* Check to see if it's a ", policy, AntiSamy.DOM).getCleanHTML().contains("z-index"));
assertTrue(!as.scan("", policy, AntiSamy.SAX).getCleanHTML().contains("z-index"));
-
}
/*
- * Test a bunch of strings that have tweaked the XML parsing capabilities of
- * NekoHTML.
- */
+ * Test a bunch of strings that have tweaked the XML parsing capabilities of
+ * NekoHTML.
+ */
@Test
public void IllegalXML() throws PolicyException {
for (String BASE64_BAD_XML_STRING : BASE64_BAD_XML_STRINGS) {
try {
-
String testStr = new String(Base64.decodeBase64(BASE64_BAD_XML_STRING.getBytes()));
as.scan(testStr, policy, AntiSamy.DOM);
as.scan(testStr, policy, AntiSamy.SAX);
@@ -384,10 +389,9 @@ public void IllegalXML() throws PolicyException {
public void issue12() throws ScanException, PolicyException {
/*
- * issues 12 (and 36, which was similar). empty tags cause display
- * problems/"formjacking"
- */
-
+ * issues 12 (and 36, which was similar). empty tags cause display
+ * problems/"formjacking"
+ */
Pattern p = Pattern.compile(".*.*");
String s1 = as.scan("test |
AntiSamy turns harmless payload into XSS by just decoding the encoded ampersands in the href attribute\n" + + "
Original without ampersand encoding
\n" + + "\n" + + ""; + + @Test + public void testGithubIssue33() throws ScanException, PolicyException { + + // Potential bypass + + // Issue claims you end up with this: + // javascript:x=alert and other similar problems (javascript:x=alert,x%281%29) but you don't. + // So issue is a false positive and has been closed. + //System.out.println(as.scan(test33, policy, AntiSamy.SAX).getCleanHTML()); + + assertThat(as.scan(test33, policy, AntiSamy.SAX).getCleanHTML(), not(containsString("javascript:x=alert,x%281%29"))); + assertThat(as.scan(test33, policy, AntiSamy.DOM).getCleanHTML(), not(containsString("javascript:x=alert,x%281%29"))); + } + + // TODO: This issue is a valid enhancement request. We are trying to decide whether to implement in the future. + // Commenting out the test case for now so test failures aren't included in a released version of AntiSamy. +/* + @Test + public void testGithubIssue34a() throws ScanException, PolicyException { + + // bypass stripNonValidXMLCharacters + // Issue indicates: "