Skip to content

Commit

Permalink
Merge pull request #210 from brave/list-metadata-parsing
Browse files Browse the repository at this point in the history
Support parsing ABP special comment list metadata
  • Loading branch information
antonok-edm authored Apr 28, 2022
2 parents d390a3c + 2c3f4b6 commit 52fed94
Show file tree
Hide file tree
Showing 2 changed files with 204 additions and 13 deletions.
15 changes: 10 additions & 5 deletions native/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::cell::RefCell;
use std::sync::Mutex;
use std::path::Path;
use adblock::engine::Engine as EngineInternal;
use adblock::lists::{RuleTypes, FilterFormat, FilterSet as FilterSetInternal, ParseOptions};
use adblock::lists::{RuleTypes, FilterFormat, FilterListMetadata, FilterSet as FilterSetInternal, ParseOptions};
use adblock::resources::Resource;
use adblock::resources::resource_assembler::{assemble_web_accessible_resources, assemble_scriptlet_resources};

Expand All @@ -19,7 +19,7 @@ impl FilterSet {
fn new(debug: bool) -> Self {
Self(RefCell::new(FilterSetInternal::new(debug)))
}
fn add_filters(&self, rules: &[String], opts: ParseOptions) {
fn add_filters(&self, rules: &[String], opts: ParseOptions) -> FilterListMetadata {
self.0.borrow_mut().add_filters(rules, opts)
}
fn add_filter(&self, filter: &str, opts: ParseOptions) -> Result<(), adblock::lists::FilterParseError> {
Expand All @@ -42,7 +42,7 @@ fn create_filter_set(mut cx: FunctionContext) -> JsResult<JsBox<FilterSet>> {
}
}

fn filter_set_add_filters(mut cx: FunctionContext) -> JsResult<JsNull> {
fn filter_set_add_filters(mut cx: FunctionContext) -> JsResult<JsValue> {
let this = cx.argument::<JsBox<FilterSet>>(0)?;

// Take the first argument, which must be an array
Expand All @@ -67,9 +67,14 @@ fn filter_set_add_filters(mut cx: FunctionContext) -> JsResult<JsNull> {
rules.push(rule);
}

this.add_filters(&rules, parse_opts);
let metadata = this.add_filters(&rules, parse_opts);

Ok(JsNull::new(&mut cx))
let js_metadata = match neon_serde::to_value(&mut cx, &metadata) {
Ok(v) => v,
Err(e) => cx.throw_error(e.to_string())?,
};

Ok(js_metadata)
}

fn filter_set_add_filter(mut cx: FunctionContext) -> JsResult<JsBoolean> {
Expand Down
202 changes: 194 additions & 8 deletions src/lists.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
//! Parsing functions and collections for handling with multiple filter rules.

use std::convert::TryFrom;

use crate::filters::network::{NetworkFilter, NetworkFilterError};
use crate::filters::cosmetic::{CosmeticFilter, CosmeticFilterError};

Expand Down Expand Up @@ -104,6 +106,94 @@ impl Default for FilterSet {
}
}

/// Corresponds to the `expires` field of `FilterListMetadata`.
#[derive(Debug, PartialEq, Serialize)]
pub enum ExpiresInterval {
Hours(u16),
Days(u8),
}

impl TryFrom<&str> for ExpiresInterval {
type Error = ();

fn try_from(v: &str) -> Result<Self, ()> {
const DAYS_MAX: u8 = 14;
const HOURS_MAX: u16 = DAYS_MAX as u16 * 24;
// str::parse::<u16> accepts a leading plus sign, but we explicitly forbid it here
if v.starts_with('+') {
return Err(());
// Special case for singular hour or day values
} else if v == "1 hour" {
return Ok(Self::Hours(1));
} else if v == "1 day" {
return Ok(Self::Days(1));
// Otherwise accept in the range [2, MAX] for values with a matching suffix
} if let Some(numstr) = v.strip_suffix(" hours") {
let num = numstr.parse::<u16>().map_err(|_| ())?;
if num < 2 || num > HOURS_MAX {
return Err(());
}
Ok(Self::Hours(num))
} else if let Some(numstr) = v.strip_suffix(" days") {
let num = numstr.parse::<u8>().map_err(|_| ())?;
if num < 2 || num > DAYS_MAX {
return Err(());
}
Ok(Self::Days(num))
} else {
Err(())
}
}
}

/// Includes information about any "special comments" as described by
/// https://help.eyeo.com/adblockplus/how-to-write-filters#special-comments
#[derive(Default, Serialize)]
pub struct FilterListMetadata {
/// `! Homepage: http://example.com` - This comment determines which webpage should be linked
/// as filter list homepage.
homepage: Option<String>,
/// `! Title: FooList` - This comment sets a fixed title for the filter list. If this comment
/// is present, the user is no longer able to change the title.
title: Option<String>,
/// `! Expires: 5 days` - This comment sets the update interval for the filter list. The value
/// can be given in days (e.g. 5 days) or hours (e.g. 8 hours). Any value between 1 hour and 14
/// days is possible. Note that the update will not necessarily happen after this time
/// interval. The actual update time is slightly randomized and depends on some additional
/// factors to reduce server load.
expires: Option<ExpiresInterval>,
/// `! Redirect: http://example.com/list.txt` - This comment indicates that the filter list has
/// moved to a new download address. Adblock Plus ignores any file content beyond that comment
/// and immediately tries downloading from the new address. In case of success, the address of
/// the filter list is updated in the settings. This comment is ignored if the new address is
/// the same as the current address, meaning that it can be used to enforce the "canonical"
/// address of the filter list.
redirect: Option<String>,
}

impl FilterListMetadata {
/// Attempts to add a line of a filter list to this collection of metadata. Only comment lines
/// with valid metadata content will be added. Previously added information will not be
/// rewritten.
fn try_add(&mut self, line: &str) {
if let Some(kv) = line.strip_prefix("! ") {
if let Some((key, value)) = kv.split_once(": ") {
match key {
"Homepage" if self.homepage == None => self.homepage = Some(value.to_string()),
"Title" if self.title == None => self.title = Some(value.to_string()),
"Expires" if self.expires == None => {
if let Ok(expires) = ExpiresInterval::try_from(value) {
self.expires = Some(expires);
}
}
"Redirect" if self.redirect == None => self.redirect = Some(value.to_string()),
_ => (),
}
}
}
}
}

impl FilterSet {
/// Creates a new `FilterSet`. `debug` specifies whether or not to save information about the
/// original raw filter rules alongside the more compact internal representation. If enabled,
Expand All @@ -117,18 +207,20 @@ impl FilterSet {
}

/// Adds the contents of an entire filter list to this `FilterSet`. Filters that cannot be
/// parsed successfully are ignored.
pub fn add_filter_list(&mut self, filter_list: &str, opts: ParseOptions) {
/// parsed successfully are ignored. Returns any discovered metadata about the list of rules
/// added.
pub fn add_filter_list(&mut self, filter_list: &str, opts: ParseOptions) -> FilterListMetadata {
let rules = filter_list.lines().map(str::to_string).collect::<Vec<_>>();
self.add_filters(&rules, opts);
self.add_filters(&rules, opts)
}

/// Adds a collection of filter rules to this `FilterSet`. Filters that cannot be parsed
/// successfully are ignored.
pub fn add_filters(&mut self, filters: &[String], opts: ParseOptions) {
let (mut parsed_network_filters, mut parsed_cosmetic_filters) = parse_filters(&filters, self.debug, opts);
/// successfully are ignored. Returns any discovered metadata about the list of rules added.
pub fn add_filters(&mut self, filters: &[String], opts: ParseOptions) -> FilterListMetadata {
let (metadata, mut parsed_network_filters, mut parsed_cosmetic_filters) = parse_filters_with_metadata(&filters, self.debug, opts);
self.network_filters.append(&mut parsed_network_filters);
self.cosmetic_filters.append(&mut parsed_cosmetic_filters);
metadata
}

/// Adds the string representation of a single filter rule to this `FilterSet`.
Expand Down Expand Up @@ -346,17 +438,37 @@ pub fn parse_filters(
debug: bool,
opts: ParseOptions,
) -> (Vec<NetworkFilter>, Vec<CosmeticFilter>) {
let (_metadata, network_filters, cosmetic_filters) = parse_filters_with_metadata(
list,
debug,
opts,
);

(network_filters, cosmetic_filters)
}

/// Parse an entire list of filters, ignoring any errors
pub fn parse_filters_with_metadata(
list: &[String],
debug: bool,
opts: ParseOptions,
) -> (FilterListMetadata, Vec<NetworkFilter>, Vec<CosmeticFilter>) {
let mut metadata = FilterListMetadata::default();

let list_iter = list.iter();

let (network_filters, cosmetic_filters): (Vec<_>, Vec<_>) = list_iter
.map(|line| parse_filter(line, debug, opts))
.map(|line| {
metadata.try_add(line);
parse_filter(line, debug, opts)
})
.filter_map(Result::ok)
.partition_map(|filter| match filter {
ParsedFilter::Network(f) => Either::Left(f),
ParsedFilter::Cosmetic(f) => Either::Right(f),
});

(network_filters, cosmetic_filters)
(metadata, network_filters, cosmetic_filters)
}

/// Given a single line, checks if this would likely be a cosmetic filter, a
Expand Down Expand Up @@ -522,4 +634,78 @@ mod tests {
Default::default(),
).is_err());
}

#[test]
fn test_parse_expires_interval() {
assert_eq!(ExpiresInterval::try_from("0 hour"), Err(()));
assert_eq!(ExpiresInterval::try_from("0 hours"), Err(()));
assert_eq!(ExpiresInterval::try_from("1 hour"), Ok(ExpiresInterval::Hours(1)));
assert_eq!(ExpiresInterval::try_from("1 hours"), Err(()));
assert_eq!(ExpiresInterval::try_from("2 hours"), Ok(ExpiresInterval::Hours(2)));
assert_eq!(ExpiresInterval::try_from("2 hour"), Err(()));
assert_eq!(ExpiresInterval::try_from("3.5 hours"), Err(()));
assert_eq!(ExpiresInterval::try_from("336 hours"), Ok(ExpiresInterval::Hours(336)));
assert_eq!(ExpiresInterval::try_from("337 hours"), Err(()));

assert_eq!(ExpiresInterval::try_from("0 day"), Err(()));
assert_eq!(ExpiresInterval::try_from("0 days"), Err(()));
assert_eq!(ExpiresInterval::try_from("1 day"), Ok(ExpiresInterval::Days(1)));
assert_eq!(ExpiresInterval::try_from("1 days"), Err(()));
assert_eq!(ExpiresInterval::try_from("2 days"), Ok(ExpiresInterval::Days(2)));
assert_eq!(ExpiresInterval::try_from("2 day"), Err(()));
assert_eq!(ExpiresInterval::try_from("3.5 days"), Err(()));
assert_eq!(ExpiresInterval::try_from("14 days"), Ok(ExpiresInterval::Days(14)));
assert_eq!(ExpiresInterval::try_from("15 days"), Err(()));

assert_eq!(ExpiresInterval::try_from("-5 hours"), Err(()));
assert_eq!(ExpiresInterval::try_from("+5 hours"), Err(()));
}

#[test]
fn test_parsing_list_metadata() {
let list = [
"[Adblock Plus 2.0]".to_string(),
"! Title: 0131 Block List".to_string(),
"! Homepage: https://austinhuang.me/0131-block-list".to_string(),
"! Licence: https://creativecommons.org/licenses/by-sa/4.0/".to_string(),
"! Expires: 7 days".to_string(),
"! Version: 20220411".to_string(),
"".to_string(),
"! => https://austinhuang.me/0131-block-list/list.txt".to_string(),
];

let mut filter_set = FilterSet::new(false);
let metadata = filter_set.add_filters(&list[..], ParseOptions::default());

assert_eq!(metadata.title, Some("0131 Block List".to_string()));
assert_eq!(metadata.homepage, Some("https://austinhuang.me/0131-block-list".to_string()));
assert_eq!(metadata.expires, Some(ExpiresInterval::Days(7)));
assert_eq!(metadata.redirect, None);
}

#[test]
/// Some lists are formatted in unusual ways. This example has a version string with
/// non-numeric characters and an `Expires` field with extra information trailing afterwards.
/// Valid fields should still be recognized and parsed accordingly.
fn test_parsing_list_best_effort() {
let list = [
"[Adblock Plus 2]".to_string(),
"!-----------------------------------".to_string(),
"! ABOUT".to_string(),
"!-----------------------------------".to_string(),
"! Version: 1.2.0.0".to_string(),
"! Title: ABPVN Advanced".to_string(),
"! Last modified: 09/03/2021".to_string(),
"! Expires: 7 days (update frequency)".to_string(),
"! Homepage: https://www.haopro.net/".to_string(),
];

let mut filter_set = FilterSet::new(false);
let metadata = filter_set.add_filters(&list[..], ParseOptions::default());

assert_eq!(metadata.title, Some("ABPVN Advanced".to_string()));
assert_eq!(metadata.homepage, Some("https://www.haopro.net/".to_string()));
assert_eq!(metadata.expires, None);
assert_eq!(metadata.redirect, None);
}
}

0 comments on commit 52fed94

Please sign in to comment.