From b8518f39add6637d9c93e5acd21f30e5c7007950 Mon Sep 17 00:00:00 2001 From: wackget <136205263+wackget@users.noreply.github.com> Date: Tue, 8 Oct 2024 17:10:12 +0100 Subject: [PATCH 1/2] Exclude `rel=dns-prefetch` links Resolves #1499 --- lychee-lib/src/extract/html/html5ever.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lychee-lib/src/extract/html/html5ever.rs b/lychee-lib/src/extract/html/html5ever.rs index d10ac4e00e..e1b7a6794f 100644 --- a/lychee-lib/src/extract/html/html5ever.rs +++ b/lychee-lib/src/extract/html/html5ever.rs @@ -76,10 +76,10 @@ impl TokenSink for LinkExtractor { } } - // Check and exclude rel=preconnect. Other than prefetch and preload, - // preconnect only does DNS lookups and might not be a link to a resource + // Check and exclude `rel=preconnect` and `rel=dns-prefetch`. Unlike `prefetch` and `preload`, + // `preconnect` and `dns-prefetch` only perform DNS lookups and do not necessarily link to a resource if let Some(rel) = attrs.iter().find(|attr| &attr.name.local == "rel") { - if rel.value.contains("preconnect") { + if rel.value.contains("preconnect") || rel.value.contains("dns-prefetch") { return TokenSinkResult::Continue; } } From 9be2544b0c3a572c7f05809ce9e59dd60214ccbd Mon Sep 17 00:00:00 2001 From: Matthias Date: Sat, 12 Oct 2024 01:34:26 +0200 Subject: [PATCH 2/2] Add tests for dns-prefetch --- lychee-lib/src/extract/html/html5ever.rs | 20 ++++++++++++++++ lychee-lib/src/extract/html/html5gum.rs | 29 ++++++++++++++++++++---- 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/lychee-lib/src/extract/html/html5ever.rs b/lychee-lib/src/extract/html/html5ever.rs index e1b7a6794f..3a601287ae 100644 --- a/lychee-lib/src/extract/html/html5ever.rs +++ b/lychee-lib/src/extract/html/html5ever.rs @@ -413,4 +413,24 @@ mod tests { let uris = extract_html(input, false); assert!(uris.is_empty()); } + + #[test] + fn test_skip_dns_prefetch() { + let input = r#" + + "#; + + let uris = extract_html(input, false); + assert!(uris.is_empty()); + } + + #[test] + fn test_skip_dns_prefetch_reverse_order() { + let input = r#" + + "#; + + let uris = extract_html(input, false); + assert!(uris.is_empty()); + } } diff --git a/lychee-lib/src/extract/html/html5gum.rs b/lychee-lib/src/extract/html/html5gum.rs index 276d2e0e86..fa5db3f9b3 100644 --- a/lychee-lib/src/extract/html/html5gum.rs +++ b/lychee-lib/src/extract/html/html5gum.rs @@ -151,8 +151,8 @@ impl LinkExtractor { /// Here are the rules for extracting links: /// - If the current element has a `rel=nofollow` attribute, the current attribute /// value is ignored. - /// - If the current element has a `rel=preconnect` attribute, the current attribute - /// value is ignored. + /// - If the current element has a `rel=preconnect` or `rel=dns-prefetch` + /// attribute, the current attribute value is ignored. /// - If the current attribute value is not a URL, it is treated as plain text and /// added to the links vector. /// - If the current attribute name is `id`, the current attribute value is added @@ -170,8 +170,9 @@ impl LinkExtractor { } if self.current_attributes.get("rel").map_or(false, |rel| { - rel.split(',') - .any(|r| r.trim() == "nofollow" || r.trim() == "preconnect") + rel.split(',').any(|r| { + r.trim() == "nofollow" || r.trim() == "preconnect" || r.trim() == "dns-prefetch" + }) }) { self.current_attributes.clear(); return; @@ -607,4 +608,24 @@ mod tests { let uris = extract_html(input, false); assert!(uris.is_empty()); } + + #[test] + fn test_skip_dns_prefetch() { + let input = r#" + + "#; + + let uris = extract_html(input, false); + assert!(uris.is_empty()); + } + + #[test] + fn test_skip_dns_prefetch_reverse_order() { + let input = r#" + + "#; + + let uris = extract_html(input, false); + assert!(uris.is_empty()); + } }