Skip to content

Commit

Permalink
avoid creating invalid filters for AdGuard cosmetic filter location r…
Browse files Browse the repository at this point in the history
…egexes
  • Loading branch information
antonok-edm committed Oct 5, 2024
1 parent baa7deb commit c3dabb7
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 7 deletions.
38 changes: 31 additions & 7 deletions src/content_blocking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,9 @@ impl TryFrom<CosmeticFilter> for CbRule {
type Error = CbRuleCreationFailure;

fn try_from(v: CosmeticFilter) -> Result<Self, Self::Error> {
use crate::filters::cosmetic::{CosmeticFilterLocationType, CosmeticFilterMask};
use crate::filters::cosmetic::{
CosmeticFilterLocationType as LocationType, CosmeticFilterMask,
};

if v.action.is_some() {
return Err(CbRuleCreationFailure::CosmeticActionRulesNotSupported);
Expand All @@ -573,28 +575,29 @@ impl TryFrom<CosmeticFilter> for CbRule {
let mut hostnames_vec = vec![];
let mut not_hostnames_vec = vec![];

let mut any_entities = false;
let mut any_unsupported = false;

// Unwrap is okay here - cosmetic rules must have a '#' character
let sharp_index = find_char(b'#', raw_line.as_bytes()).unwrap();
CosmeticFilter::locations_before_sharp(&raw_line, sharp_index).for_each(
|(location_type, location)| match location_type {
CosmeticFilterLocationType::Entity => any_entities = true,
CosmeticFilterLocationType::NotEntity => any_entities = true,
CosmeticFilterLocationType::Hostname => {
LocationType::Entity | LocationType::NotEntity | LocationType::Unsupported => {
any_unsupported = true
}
LocationType::Hostname => {
if let Ok(encoded) = idna::domain_to_ascii(location) {
hostnames_vec.push(encoded);
}
}
CosmeticFilterLocationType::NotHostname => {
LocationType::NotHostname => {
if let Ok(encoded) = idna::domain_to_ascii(location) {
not_hostnames_vec.push(encoded);
}
}
},
);

if any_entities {
if any_unsupported && hostnames_vec.is_empty() && not_hostnames_vec.is_empty() {
return Err(CbRuleCreationFailure::CosmeticEntitiesUnsupported);
}

Expand Down Expand Up @@ -1403,4 +1406,25 @@ mod filterset_tests {

Ok(())
}

#[test]
fn convert_cosmetic_filter_locations() -> Result<(), ()> {
let list = [
r"/^dizipal\d+\.com$/##.web",
r"/^example\d+\.com$/,test.net,b.*##.ad",
];
let mut set = FilterSet::new(true);
set.add_filters(&list, Default::default());

let (cb_rules, used_rules) = set.into_content_blocking()?;
assert_eq!(used_rules.len(), 1);
assert_eq!(cb_rules.len(), 1);
assert!(cb_rules[0].trigger.if_domain.is_some());
assert_eq!(
cb_rules[0].trigger.if_domain.as_ref().unwrap(),
&["test.net"]
);

Ok(())
}
}
25 changes: 25 additions & 0 deletions src/filters/cosmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ pub(crate) enum CosmeticFilterLocationType {
NotEntity,
Hostname,
NotHostname,
Unsupported,
}

/// Contains hashes of all of the comma separated location items that were populated before the
Expand Down Expand Up @@ -186,6 +187,10 @@ impl CosmeticFilter {
hostname.len()
};
let location = &hostname[start..end];
// AdGuard regex syntax
if location.starts_with('/') {
return Some((CosmeticFilterLocationType::Unsupported, part));
}
Some(match (negation, entity) {
(true, true) => (CosmeticFilterLocationType::NotEntity, location),
(true, false) => (CosmeticFilterLocationType::NotHostname, location),
Expand Down Expand Up @@ -216,6 +221,7 @@ impl CosmeticFilter {
return Err(CosmeticFilterError::LocationModifiersUnsupported);
}

let mut any_unsupported = false;
for (location_type, location) in Self::locations_before_sharp(line, sharp_index) {
let mut hostname = String::new();
if location.is_ascii() {
Expand All @@ -232,8 +238,20 @@ impl CosmeticFilter {
CosmeticFilterLocationType::NotHostname => not_hostnames_vec.push(hash),
CosmeticFilterLocationType::Entity => entities_vec.push(hash),
CosmeticFilterLocationType::Hostname => hostnames_vec.push(hash),
CosmeticFilterLocationType::Unsupported => {
any_unsupported = true;
}
}
}
// Discard the rule altogether if there are no supported location types
if any_unsupported
&& hostnames_vec.is_empty()
&& entities_vec.is_empty()
&& not_hostnames_vec.is_empty()
&& not_entities_vec.is_empty()
{
return Err(CosmeticFilterError::UnsupportedSyntax);
}

/// Sorts `vec` and wraps it in `Some` if it's not empty, or returns `None` if it is.
#[inline]
Expand Down Expand Up @@ -2167,6 +2185,13 @@ mod matching_tests {
assert!(parse_cf(r#"​##a[href^="https://www.g2fame.com/"] > img"#).is_err());
}

#[test]
fn adg_regex() {
assert!(parse_cf(r"/^dizipal\d+\.com$/##.web").is_err());
// Filter is still salvageable if at least one location is supported
assert!(parse_cf(r"/^dizipal\d+\.com,test.net$/##.web").is_ok());
}

#[test]
#[cfg(feature = "css-validation")]
fn abp_has_conversion() {
Expand Down

0 comments on commit c3dabb7

Please sign in to comment.