Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement issue #276 exclude certain URLs from checking, also impleme… #368

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docToolchainConfig.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ exportEA.with {
htmlSanityCheck.with {
sourceDir = 'microsite/output'
resultsFolder = 'html-sanity-check'
//urlsToExclude = ['http://example.com/excluded', 'http://example.com/excluded2']
//hostsToExclude = ['example2.com', 'example3', 'example4']
}
//end::htmlSanityCheckConfig[]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ public class Configuration {
@Getter(AccessLevel.NONE)
@Builder.Default
Boolean ignoreIPAddresses = false;
Set<String> urlsToExclude;
Set<String> hostsToExclude;

/*
* Explanation for configuring http status codes:
* The standard http status codes are defined in class @link NetUtil and can
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ class BrokenHttpLinksChecker extends Checker {
// need that to calculate "nrOfOccurrences"
// the pure http/https-hrefs a set, duplicates are removed here
private Set<String> hrefSet;
private Set<String> urlsToExclude;
private Set<String> hostsToExclude;


BrokenHttpLinksChecker(Configuration pConfig) {
Expand All @@ -45,6 +47,8 @@ class BrokenHttpLinksChecker extends Checker {
errorCodes = getMyConfig().getHttpErrorCodes();
warningCodes = getMyConfig().getHttpWarningCodes();
successCodes = getMyConfig().getHttpSuccessCodes();
urlsToExclude = getMyConfig().getUrlsToExclude();
hostsToExclude = getMyConfig().getHostsToExclude();
}

@Override
Expand Down Expand Up @@ -101,6 +105,25 @@ private void checkAllHttpLinks() {


protected void doubleCheckSingleHttpLink(String href) {
if (urlsToExclude != null && urlsToExclude.contains(href)) {
// Skip checking this URL
return;
}

// Check if the host of the URL is in the hostsToExclude list
try {
URL url = new URL(href);
String host = url.getHost();
if (hostsToExclude != null && hostsToExclude.contains(host)) {
// Skip checking this URL
return;
}
} catch (MalformedURLException e) {
// Handle the exception if the URL is malformed
Finding malformedURLFinding = new Finding("malformed URL exception with href=" + href);
getCheckingResults().addFinding(malformedURLFinding);
return;
}
// bookkeeping:
getCheckingResults().incNrOfChecks();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ class HtmlSanityCheckTask extends DefaultTask {
@Optional
@Input
Set<Integer> httpSuccessCodes
@Optional
@Input
Set<String> urlsToExclude
@Optional
@Input
Set<String> hostsToExclude

@Input
List<Class<? extends Checker>> checkerClasses = AllCheckers.CHECKER_CLASSES
Expand Down Expand Up @@ -187,6 +193,8 @@ See ${checkingResultsDir} for a detailed report."""
.ignoreIPAddresses(ignoreIPAddresses)

.checksToExecute(checkerClasses)
.urlsToExclude(urlsToExclude)
.hostsToExclude(hostsToExclude)
.build()

// in case we have configured specific interpretations of http status codes
Expand All @@ -212,6 +220,8 @@ See ${checkingResultsDir} for a detailed report."""
logger.info "Results dir : $checkingResultsDir"
logger.info "JUnit dir : $junitResultsDir"
logger.info "Fail on errors : $failOnErrors"
logger.info "Urls to Exclude : $urlsToExclude"
logger.info "Hosts to Exclude: $hostsToExclude"
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,22 @@ import spock.lang.Specification
class HtmlSanityCheckBaseSpec extends Specification {
final static VALID_HTML = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"><html><head></head><body></body><html>"""
final static INVALID_HTML = """<body><span id="id"/><span id="id"/></body> """
final static VALID_HTML_WITH_EXCLUDED_URL = """
<html>
<body>
<a href="http://example.com/excluded">Excluded URL</a>
<a href="http://example.com/included">Included URL</a>
</body>
</html>
"""
final static VALID_HTML_WITH_EXCLUDED_HOST = """
<html>
<body>
<a href="http://excluded.com/page">Excluded Host</a>
<a href="http://included.com/page">Included Host</a>
</body>
</html>
"""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I can see these HTMLs are only used in HtmlSanityCheckTaskFunctionalSpec (despite the other final statics that are used in both derived classes. If the code is only used in one derived class, move it there please.


@Rule
TemporaryFolder testProjectDir = new TemporaryFolder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,44 @@ class HtmlSanityCheckTaskFunctionalSpec extends HtmlSanityCheckBaseSpec {
gradleVersion << GRADLE_VERSIONS
}

@Unroll
def "can exclude specific URLs with urlsToExclude and Gradle version #gradleVersion"() {
given:
htmlFile << VALID_HTML_WITH_EXCLUDED_URL
createBuildFile("""
urlsToExclude = ['http://example.com/excluded']
""")

when:
def result = runnerForHtmlSanityCheckTask(gradleVersion as String).build()

then:
result.task(":htmlSanityCheck").outcome == SUCCESS
!result.output.contains("http://example.com/excluded")

where:
gradleVersion << GRADLE_VERSIONS
}

@Unroll
def "can exclude specific hosts with hostToExclude and Gradle version #gradleVersion"() {
given:
htmlFile << VALID_HTML_WITH_EXCLUDED_HOST
createBuildFile("""
hostsToExclude = ['excluded.com']
""")

when:
def result = runnerForHtmlSanityCheckTask(gradleVersion as String).build()

then:
result.task(":htmlSanityCheck").outcome == SUCCESS
!result.output.contains("http://excluded.com")

where:
gradleVersion << GRADLE_VERSIONS
}

private GradleRunner runnerForHtmlSanityCheckTask(String gradleVersion) {
GradleRunner.create()
.withGradleVersion(gradleVersion)
Expand Down
3 changes: 3 additions & 0 deletions self-check/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ htmlSanityCheck {

failOnErrors = true

urlsToExclude = [ "https://www.aim42.org/"]
hostsToExclude = [ "www.aim42.org" ]

logger.quiet "HSC version: ${htmlSanityCheckVersion}"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The purpose of the self check is to check the HSC documentation itself.
And it should not exclude essential links in the documentation.

Please drop this.

logger.quiet "HSC sourceDir: ${sourceDir.absolutePath}"
logger.quiet "HSC checkingResultsDir: ${checkingResultsDir.absolutePath}"
Expand Down
Loading