From c3dabb7299797d46cfb441c35692f3f9f9a192e5 Mon Sep 17 00:00:00 2001 From: Anton Lazarev Date: Sat, 5 Oct 2024 04:18:15 -0700 Subject: [PATCH] avoid creating invalid filters for AdGuard cosmetic filter location regexes --- src/content_blocking.rs | 38 +++++++++++++++++++++++++++++++------- src/filters/cosmetic.rs | 25 +++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/src/content_blocking.rs b/src/content_blocking.rs index 7702971a..ecbf5dd9 100644 --- a/src/content_blocking.rs +++ b/src/content_blocking.rs @@ -560,7 +560,9 @@ impl TryFrom for CbRule { type Error = CbRuleCreationFailure; fn try_from(v: CosmeticFilter) -> Result { - use crate::filters::cosmetic::{CosmeticFilterLocationType, CosmeticFilterMask}; + use crate::filters::cosmetic::{ + CosmeticFilterLocationType as LocationType, CosmeticFilterMask, + }; if v.action.is_some() { return Err(CbRuleCreationFailure::CosmeticActionRulesNotSupported); @@ -573,20 +575,21 @@ impl TryFrom for CbRule { let mut hostnames_vec = vec![]; let mut not_hostnames_vec = vec![]; - let mut any_entities = false; + let mut any_unsupported = false; // Unwrap is okay here - cosmetic rules must have a '#' character let sharp_index = find_char(b'#', raw_line.as_bytes()).unwrap(); CosmeticFilter::locations_before_sharp(&raw_line, sharp_index).for_each( |(location_type, location)| match location_type { - CosmeticFilterLocationType::Entity => any_entities = true, - CosmeticFilterLocationType::NotEntity => any_entities = true, - CosmeticFilterLocationType::Hostname => { + LocationType::Entity | LocationType::NotEntity | LocationType::Unsupported => { + any_unsupported = true + } + LocationType::Hostname => { if let Ok(encoded) = idna::domain_to_ascii(location) { hostnames_vec.push(encoded); } } - CosmeticFilterLocationType::NotHostname => { + LocationType::NotHostname => { if let Ok(encoded) = idna::domain_to_ascii(location) { not_hostnames_vec.push(encoded); } @@ -594,7 +597,7 @@ impl TryFrom for CbRule { }, ); - if any_entities { + if any_unsupported && hostnames_vec.is_empty() && not_hostnames_vec.is_empty() { return Err(CbRuleCreationFailure::CosmeticEntitiesUnsupported); } @@ -1403,4 +1406,25 @@ mod filterset_tests { Ok(()) } + + #[test] + fn convert_cosmetic_filter_locations() -> Result<(), ()> { + let list = [ + r"/^dizipal\d+\.com$/##.web", + r"/^example\d+\.com$/,test.net,b.*##.ad", + ]; + let mut set = FilterSet::new(true); + set.add_filters(&list, Default::default()); + + let (cb_rules, used_rules) = set.into_content_blocking()?; + assert_eq!(used_rules.len(), 1); + assert_eq!(cb_rules.len(), 1); + assert!(cb_rules[0].trigger.if_domain.is_some()); + assert_eq!( + cb_rules[0].trigger.if_domain.as_ref().unwrap(), + &["test.net"] + ); + + Ok(()) + } } diff --git a/src/filters/cosmetic.rs b/src/filters/cosmetic.rs index e3675fa9..c3d8be8e 100644 --- a/src/filters/cosmetic.rs +++ b/src/filters/cosmetic.rs @@ -150,6 +150,7 @@ pub(crate) enum CosmeticFilterLocationType { NotEntity, Hostname, NotHostname, + Unsupported, } /// Contains hashes of all of the comma separated location items that were populated before the @@ -186,6 +187,10 @@ impl CosmeticFilter { hostname.len() }; let location = &hostname[start..end]; + // AdGuard regex syntax + if location.starts_with('/') { + return Some((CosmeticFilterLocationType::Unsupported, part)); + } Some(match (negation, entity) { (true, true) => (CosmeticFilterLocationType::NotEntity, location), (true, false) => (CosmeticFilterLocationType::NotHostname, location), @@ -216,6 +221,7 @@ impl CosmeticFilter { return Err(CosmeticFilterError::LocationModifiersUnsupported); } + let mut any_unsupported = false; for (location_type, location) in Self::locations_before_sharp(line, sharp_index) { let mut hostname = String::new(); if location.is_ascii() { @@ -232,8 +238,20 @@ impl CosmeticFilter { CosmeticFilterLocationType::NotHostname => not_hostnames_vec.push(hash), CosmeticFilterLocationType::Entity => entities_vec.push(hash), CosmeticFilterLocationType::Hostname => hostnames_vec.push(hash), + CosmeticFilterLocationType::Unsupported => { + any_unsupported = true; + } } } + // Discard the rule altogether if there are no supported location types + if any_unsupported + && hostnames_vec.is_empty() + && entities_vec.is_empty() + && not_hostnames_vec.is_empty() + && not_entities_vec.is_empty() + { + return Err(CosmeticFilterError::UnsupportedSyntax); + } /// Sorts `vec` and wraps it in `Some` if it's not empty, or returns `None` if it is. #[inline] @@ -2167,6 +2185,13 @@ mod matching_tests { assert!(parse_cf(r#"​##a[href^="https://www.g2fame.com/"] > img"#).is_err()); } + #[test] + fn adg_regex() { + assert!(parse_cf(r"/^dizipal\d+\.com$/##.web").is_err()); + // Filter is still salvageable if at least one location is supported + assert!(parse_cf(r"/^dizipal\d+\.com,test.net$/##.web").is_ok()); + } + #[test] #[cfg(feature = "css-validation")] fn abp_has_conversion() {