From 1c53adf33cc0ee9f09e921e070ae9560e9a8e3ff Mon Sep 17 00:00:00 2001 From: Eleonor Bengtsson Date: Tue, 14 Jan 2025 09:23:48 +0100 Subject: [PATCH] Implement issue #276 exclude certain URLs from checking, also implement hosts to exclude --- docToolchainConfig.groovy | 2 + .../aim42/htmlsanitycheck/Configuration.java | 3 ++ .../check/BrokenHttpLinksChecker.java | 23 +++++++++++ .../gradle/HtmlSanityCheckTask.groovy | 10 +++++ .../gradle/HtmlSanityCheckBaseSpec.groovy | 16 ++++++++ .../HtmlSanityCheckTaskFunctionalSpec.groovy | 38 +++++++++++++++++++ self-check/build.gradle | 3 ++ 7 files changed, 95 insertions(+) diff --git a/docToolchainConfig.groovy b/docToolchainConfig.groovy index e60a992f..1c85aa2d 100644 --- a/docToolchainConfig.groovy +++ b/docToolchainConfig.groovy @@ -254,6 +254,8 @@ exportEA.with { htmlSanityCheck.with { sourceDir = 'microsite/output' resultsFolder = 'html-sanity-check' + //urlsToExclude = ['http://example.com/excluded', 'http://example.com/excluded2'] + //hostsToExclude = ['example2.com', 'example3', 'example4'] } //end::htmlSanityCheckConfig[] diff --git a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/Configuration.java b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/Configuration.java index a50b03b0..f2fdc22d 100644 --- a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/Configuration.java +++ b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/Configuration.java @@ -45,6 +45,9 @@ public class Configuration { @Getter(AccessLevel.NONE) @Builder.Default Boolean ignoreIPAddresses = false; + Set urlsToExclude; + Set hostsToExclude; + /* * Explanation for configuring http status codes: * The standard http status codes are defined in class @link NetUtil and can diff --git a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/BrokenHttpLinksChecker.java b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/BrokenHttpLinksChecker.java index 5ff2ad56..0a61a575 100644 --- a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/BrokenHttpLinksChecker.java +++ b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/BrokenHttpLinksChecker.java @@ -37,6 +37,8 @@ class BrokenHttpLinksChecker extends Checker { // need that to calculate "nrOfOccurrences" // the pure http/https-hrefs a set, duplicates are removed here private Set hrefSet; + private Set urlsToExclude; + private Set hostsToExclude; BrokenHttpLinksChecker(Configuration pConfig) { @@ -45,6 +47,8 @@ class BrokenHttpLinksChecker extends Checker { errorCodes = getMyConfig().getHttpErrorCodes(); warningCodes = getMyConfig().getHttpWarningCodes(); successCodes = getMyConfig().getHttpSuccessCodes(); + urlsToExclude = getMyConfig().getUrlsToExclude(); + hostsToExclude = getMyConfig().getHostsToExclude(); } @Override @@ -101,6 +105,25 @@ private void checkAllHttpLinks() { protected void doubleCheckSingleHttpLink(String href) { + if (urlsToExclude != null && urlsToExclude.contains(href)) { + // Skip checking this URL + return; + } + + // Check if the host of the URL is in the hostsToExclude list + try { + URL url = new URL(href); + String host = url.getHost(); + if (hostsToExclude != null && hostsToExclude.contains(host)) { + // Skip checking this URL + return; + } + } catch (MalformedURLException e) { + // Handle the exception if the URL is malformed + Finding malformedURLFinding = new Finding("malformed URL exception with href=" + href); + getCheckingResults().addFinding(malformedURLFinding); + return; + } // bookkeeping: getCheckingResults().incNrOfChecks(); diff --git a/htmlSanityCheck-gradle-plugin/src/main/groovy/org/aim42/htmlsanitycheck/gradle/HtmlSanityCheckTask.groovy b/htmlSanityCheck-gradle-plugin/src/main/groovy/org/aim42/htmlsanitycheck/gradle/HtmlSanityCheckTask.groovy index e5be7cfd..5a82f13b 100644 --- a/htmlSanityCheck-gradle-plugin/src/main/groovy/org/aim42/htmlsanitycheck/gradle/HtmlSanityCheckTask.groovy +++ b/htmlSanityCheck-gradle-plugin/src/main/groovy/org/aim42/htmlsanitycheck/gradle/HtmlSanityCheckTask.groovy @@ -79,6 +79,12 @@ class HtmlSanityCheckTask extends DefaultTask { @Optional @Input Set httpSuccessCodes + @Optional + @Input + Set urlsToExclude + @Optional + @Input + Set hostsToExclude @Input List> checkerClasses = AllCheckers.CHECKER_CLASSES @@ -187,6 +193,8 @@ See ${checkingResultsDir} for a detailed report.""" .ignoreIPAddresses(ignoreIPAddresses) .checksToExecute(checkerClasses) + .urlsToExclude(urlsToExclude) + .hostsToExclude(hostsToExclude) .build() // in case we have configured specific interpretations of http status codes @@ -212,6 +220,8 @@ See ${checkingResultsDir} for a detailed report.""" logger.info "Results dir : $checkingResultsDir" logger.info "JUnit dir : $junitResultsDir" logger.info "Fail on errors : $failOnErrors" + logger.info "Urls to Exclude : $urlsToExclude" + logger.info "Hosts to Exclude: $hostsToExclude" } } diff --git a/htmlSanityCheck-gradle-plugin/src/test/groovy/org/aim42/htmlsanitycheck/gradle/HtmlSanityCheckBaseSpec.groovy b/htmlSanityCheck-gradle-plugin/src/test/groovy/org/aim42/htmlsanitycheck/gradle/HtmlSanityCheckBaseSpec.groovy index 8ef683a8..bd5bede2 100644 --- a/htmlSanityCheck-gradle-plugin/src/test/groovy/org/aim42/htmlsanitycheck/gradle/HtmlSanityCheckBaseSpec.groovy +++ b/htmlSanityCheck-gradle-plugin/src/test/groovy/org/aim42/htmlsanitycheck/gradle/HtmlSanityCheckBaseSpec.groovy @@ -7,6 +7,22 @@ import spock.lang.Specification class HtmlSanityCheckBaseSpec extends Specification { final static VALID_HTML = """""" final static INVALID_HTML = """ """ + final static VALID_HTML_WITH_EXCLUDED_URL = """ + + + Excluded URL + Included URL + + + """ + final static VALID_HTML_WITH_EXCLUDED_HOST = """ + + + Excluded Host + Included Host + + + """ @Rule TemporaryFolder testProjectDir = new TemporaryFolder() diff --git a/htmlSanityCheck-gradle-plugin/src/test/groovy/org/aim42/htmlsanitycheck/gradle/HtmlSanityCheckTaskFunctionalSpec.groovy b/htmlSanityCheck-gradle-plugin/src/test/groovy/org/aim42/htmlsanitycheck/gradle/HtmlSanityCheckTaskFunctionalSpec.groovy index f7ca858a..3aabe4e5 100644 --- a/htmlSanityCheck-gradle-plugin/src/test/groovy/org/aim42/htmlsanitycheck/gradle/HtmlSanityCheckTaskFunctionalSpec.groovy +++ b/htmlSanityCheck-gradle-plugin/src/test/groovy/org/aim42/htmlsanitycheck/gradle/HtmlSanityCheckTaskFunctionalSpec.groovy @@ -113,6 +113,44 @@ class HtmlSanityCheckTaskFunctionalSpec extends HtmlSanityCheckBaseSpec { gradleVersion << GRADLE_VERSIONS } + @Unroll + def "can exclude specific URLs with urlsToExclude and Gradle version #gradleVersion"() { + given: + htmlFile << VALID_HTML_WITH_EXCLUDED_URL + createBuildFile(""" + urlsToExclude = ['http://example.com/excluded'] + """) + + when: + def result = runnerForHtmlSanityCheckTask(gradleVersion as String).build() + + then: + result.task(":htmlSanityCheck").outcome == SUCCESS + !result.output.contains("http://example.com/excluded") + + where: + gradleVersion << GRADLE_VERSIONS + } + + @Unroll + def "can exclude specific hosts with hostToExclude and Gradle version #gradleVersion"() { + given: + htmlFile << VALID_HTML_WITH_EXCLUDED_HOST + createBuildFile(""" + hostsToExclude = ['excluded.com'] + """) + + when: + def result = runnerForHtmlSanityCheckTask(gradleVersion as String).build() + + then: + result.task(":htmlSanityCheck").outcome == SUCCESS + !result.output.contains("http://excluded.com") + + where: + gradleVersion << GRADLE_VERSIONS + } + private GradleRunner runnerForHtmlSanityCheckTask(String gradleVersion) { GradleRunner.create() .withGradleVersion(gradleVersion) diff --git a/self-check/build.gradle b/self-check/build.gradle index 5f320ddf..e3c2ee71 100644 --- a/self-check/build.gradle +++ b/self-check/build.gradle @@ -11,6 +11,9 @@ htmlSanityCheck { failOnErrors = true + urlsToExclude = [ "https://www.aim42.org/"] + hostsToExclude = [ "www.aim42.org" ] + logger.quiet "HSC version: ${htmlSanityCheckVersion}" logger.quiet "HSC sourceDir: ${sourceDir.absolutePath}" logger.quiet "HSC checkingResultsDir: ${checkingResultsDir.absolutePath}"