diff --git a/native/src/lib.rs b/native/src/lib.rs index 698e34dd..28583018 100644 --- a/native/src/lib.rs +++ b/native/src/lib.rs @@ -4,7 +4,7 @@ use std::cell::RefCell; use std::sync::Mutex; use std::path::Path; use adblock::engine::Engine as EngineInternal; -use adblock::lists::{RuleTypes, FilterFormat, FilterSet as FilterSetInternal, ParseOptions}; +use adblock::lists::{RuleTypes, FilterFormat, FilterListMetadata, FilterSet as FilterSetInternal, ParseOptions}; use adblock::resources::Resource; use adblock::resources::resource_assembler::{assemble_web_accessible_resources, assemble_scriptlet_resources}; @@ -19,7 +19,7 @@ impl FilterSet { fn new(debug: bool) -> Self { Self(RefCell::new(FilterSetInternal::new(debug))) } - fn add_filters(&self, rules: &[String], opts: ParseOptions) { + fn add_filters(&self, rules: &[String], opts: ParseOptions) -> FilterListMetadata { self.0.borrow_mut().add_filters(rules, opts) } fn add_filter(&self, filter: &str, opts: ParseOptions) -> Result<(), adblock::lists::FilterParseError> { @@ -42,7 +42,7 @@ fn create_filter_set(mut cx: FunctionContext) -> JsResult> { } } -fn filter_set_add_filters(mut cx: FunctionContext) -> JsResult { +fn filter_set_add_filters(mut cx: FunctionContext) -> JsResult { let this = cx.argument::>(0)?; // Take the first argument, which must be an array @@ -67,9 +67,14 @@ fn filter_set_add_filters(mut cx: FunctionContext) -> JsResult { rules.push(rule); } - this.add_filters(&rules, parse_opts); + let metadata = this.add_filters(&rules, parse_opts); - Ok(JsNull::new(&mut cx)) + let js_metadata = match neon_serde::to_value(&mut cx, &metadata) { + Ok(v) => v, + Err(e) => cx.throw_error(e.to_string())?, + }; + + Ok(js_metadata) } fn filter_set_add_filter(mut cx: FunctionContext) -> JsResult { diff --git a/src/lists.rs b/src/lists.rs index 09a81d18..2298d2ca 100644 --- a/src/lists.rs +++ b/src/lists.rs @@ -1,5 +1,7 @@ //! Parsing functions and collections for handling with multiple filter rules. +use std::convert::TryFrom; + use crate::filters::network::{NetworkFilter, NetworkFilterError}; use crate::filters::cosmetic::{CosmeticFilter, CosmeticFilterError}; @@ -104,6 +106,94 @@ impl Default for FilterSet { } } +/// Corresponds to the `expires` field of `FilterListMetadata`. +#[derive(Debug, PartialEq, Serialize)] +pub enum ExpiresInterval { + Hours(u16), + Days(u8), +} + +impl TryFrom<&str> for ExpiresInterval { + type Error = (); + + fn try_from(v: &str) -> Result { + const DAYS_MAX: u8 = 14; + const HOURS_MAX: u16 = DAYS_MAX as u16 * 24; + // str::parse:: accepts a leading plus sign, but we explicitly forbid it here + if v.starts_with('+') { + return Err(()); + // Special case for singular hour or day values + } else if v == "1 hour" { + return Ok(Self::Hours(1)); + } else if v == "1 day" { + return Ok(Self::Days(1)); + // Otherwise accept in the range [2, MAX] for values with a matching suffix + } if let Some(numstr) = v.strip_suffix(" hours") { + let num = numstr.parse::().map_err(|_| ())?; + if num < 2 || num > HOURS_MAX { + return Err(()); + } + Ok(Self::Hours(num)) + } else if let Some(numstr) = v.strip_suffix(" days") { + let num = numstr.parse::().map_err(|_| ())?; + if num < 2 || num > DAYS_MAX { + return Err(()); + } + Ok(Self::Days(num)) + } else { + Err(()) + } + } +} + +/// Includes information about any "special comments" as described by +/// https://help.eyeo.com/adblockplus/how-to-write-filters#special-comments +#[derive(Default, Serialize)] +pub struct FilterListMetadata { + /// `! Homepage: http://example.com` - This comment determines which webpage should be linked + /// as filter list homepage. + homepage: Option, + /// `! Title: FooList` - This comment sets a fixed title for the filter list. If this comment + /// is present, the user is no longer able to change the title. + title: Option, + /// `! Expires: 5 days` - This comment sets the update interval for the filter list. The value + /// can be given in days (e.g. 5 days) or hours (e.g. 8 hours). Any value between 1 hour and 14 + /// days is possible. Note that the update will not necessarily happen after this time + /// interval. The actual update time is slightly randomized and depends on some additional + /// factors to reduce server load. + expires: Option, + /// `! Redirect: http://example.com/list.txt` - This comment indicates that the filter list has + /// moved to a new download address. Adblock Plus ignores any file content beyond that comment + /// and immediately tries downloading from the new address. In case of success, the address of + /// the filter list is updated in the settings. This comment is ignored if the new address is + /// the same as the current address, meaning that it can be used to enforce the "canonical" + /// address of the filter list. + redirect: Option, +} + +impl FilterListMetadata { + /// Attempts to add a line of a filter list to this collection of metadata. Only comment lines + /// with valid metadata content will be added. Previously added information will not be + /// rewritten. + fn try_add(&mut self, line: &str) { + if let Some(kv) = line.strip_prefix("! ") { + if let Some((key, value)) = kv.split_once(": ") { + match key { + "Homepage" if self.homepage == None => self.homepage = Some(value.to_string()), + "Title" if self.title == None => self.title = Some(value.to_string()), + "Expires" if self.expires == None => { + if let Ok(expires) = ExpiresInterval::try_from(value) { + self.expires = Some(expires); + } + } + "Redirect" if self.redirect == None => self.redirect = Some(value.to_string()), + _ => (), + } + } + } + } +} + impl FilterSet { /// Creates a new `FilterSet`. `debug` specifies whether or not to save information about the /// original raw filter rules alongside the more compact internal representation. If enabled, @@ -117,18 +207,20 @@ impl FilterSet { } /// Adds the contents of an entire filter list to this `FilterSet`. Filters that cannot be - /// parsed successfully are ignored. - pub fn add_filter_list(&mut self, filter_list: &str, opts: ParseOptions) { + /// parsed successfully are ignored. Returns any discovered metadata about the list of rules + /// added. + pub fn add_filter_list(&mut self, filter_list: &str, opts: ParseOptions) -> FilterListMetadata { let rules = filter_list.lines().map(str::to_string).collect::>(); - self.add_filters(&rules, opts); + self.add_filters(&rules, opts) } /// Adds a collection of filter rules to this `FilterSet`. Filters that cannot be parsed - /// successfully are ignored. - pub fn add_filters(&mut self, filters: &[String], opts: ParseOptions) { - let (mut parsed_network_filters, mut parsed_cosmetic_filters) = parse_filters(&filters, self.debug, opts); + /// successfully are ignored. Returns any discovered metadata about the list of rules added. + pub fn add_filters(&mut self, filters: &[String], opts: ParseOptions) -> FilterListMetadata { + let (metadata, mut parsed_network_filters, mut parsed_cosmetic_filters) = parse_filters_with_metadata(&filters, self.debug, opts); self.network_filters.append(&mut parsed_network_filters); self.cosmetic_filters.append(&mut parsed_cosmetic_filters); + metadata } /// Adds the string representation of a single filter rule to this `FilterSet`. @@ -346,17 +438,37 @@ pub fn parse_filters( debug: bool, opts: ParseOptions, ) -> (Vec, Vec) { + let (_metadata, network_filters, cosmetic_filters) = parse_filters_with_metadata( + list, + debug, + opts, + ); + + (network_filters, cosmetic_filters) +} + +/// Parse an entire list of filters, ignoring any errors +pub fn parse_filters_with_metadata( + list: &[String], + debug: bool, + opts: ParseOptions, +) -> (FilterListMetadata, Vec, Vec) { + let mut metadata = FilterListMetadata::default(); + let list_iter = list.iter(); let (network_filters, cosmetic_filters): (Vec<_>, Vec<_>) = list_iter - .map(|line| parse_filter(line, debug, opts)) + .map(|line| { + metadata.try_add(line); + parse_filter(line, debug, opts) + }) .filter_map(Result::ok) .partition_map(|filter| match filter { ParsedFilter::Network(f) => Either::Left(f), ParsedFilter::Cosmetic(f) => Either::Right(f), }); - (network_filters, cosmetic_filters) + (metadata, network_filters, cosmetic_filters) } /// Given a single line, checks if this would likely be a cosmetic filter, a @@ -522,4 +634,78 @@ mod tests { Default::default(), ).is_err()); } + + #[test] + fn test_parse_expires_interval() { + assert_eq!(ExpiresInterval::try_from("0 hour"), Err(())); + assert_eq!(ExpiresInterval::try_from("0 hours"), Err(())); + assert_eq!(ExpiresInterval::try_from("1 hour"), Ok(ExpiresInterval::Hours(1))); + assert_eq!(ExpiresInterval::try_from("1 hours"), Err(())); + assert_eq!(ExpiresInterval::try_from("2 hours"), Ok(ExpiresInterval::Hours(2))); + assert_eq!(ExpiresInterval::try_from("2 hour"), Err(())); + assert_eq!(ExpiresInterval::try_from("3.5 hours"), Err(())); + assert_eq!(ExpiresInterval::try_from("336 hours"), Ok(ExpiresInterval::Hours(336))); + assert_eq!(ExpiresInterval::try_from("337 hours"), Err(())); + + assert_eq!(ExpiresInterval::try_from("0 day"), Err(())); + assert_eq!(ExpiresInterval::try_from("0 days"), Err(())); + assert_eq!(ExpiresInterval::try_from("1 day"), Ok(ExpiresInterval::Days(1))); + assert_eq!(ExpiresInterval::try_from("1 days"), Err(())); + assert_eq!(ExpiresInterval::try_from("2 days"), Ok(ExpiresInterval::Days(2))); + assert_eq!(ExpiresInterval::try_from("2 day"), Err(())); + assert_eq!(ExpiresInterval::try_from("3.5 days"), Err(())); + assert_eq!(ExpiresInterval::try_from("14 days"), Ok(ExpiresInterval::Days(14))); + assert_eq!(ExpiresInterval::try_from("15 days"), Err(())); + + assert_eq!(ExpiresInterval::try_from("-5 hours"), Err(())); + assert_eq!(ExpiresInterval::try_from("+5 hours"), Err(())); + } + + #[test] + fn test_parsing_list_metadata() { + let list = [ + "[Adblock Plus 2.0]".to_string(), + "! Title: 0131 Block List".to_string(), + "! Homepage: https://austinhuang.me/0131-block-list".to_string(), + "! Licence: https://creativecommons.org/licenses/by-sa/4.0/".to_string(), + "! Expires: 7 days".to_string(), + "! Version: 20220411".to_string(), + "".to_string(), + "! => https://austinhuang.me/0131-block-list/list.txt".to_string(), + ]; + + let mut filter_set = FilterSet::new(false); + let metadata = filter_set.add_filters(&list[..], ParseOptions::default()); + + assert_eq!(metadata.title, Some("0131 Block List".to_string())); + assert_eq!(metadata.homepage, Some("https://austinhuang.me/0131-block-list".to_string())); + assert_eq!(metadata.expires, Some(ExpiresInterval::Days(7))); + assert_eq!(metadata.redirect, None); + } + + #[test] + /// Some lists are formatted in unusual ways. This example has a version string with + /// non-numeric characters and an `Expires` field with extra information trailing afterwards. + /// Valid fields should still be recognized and parsed accordingly. + fn test_parsing_list_best_effort() { + let list = [ + "[Adblock Plus 2]".to_string(), + "!-----------------------------------".to_string(), + "! ABOUT".to_string(), + "!-----------------------------------".to_string(), + "! Version: 1.2.0.0".to_string(), + "! Title: ABPVN Advanced".to_string(), + "! Last modified: 09/03/2021".to_string(), + "! Expires: 7 days (update frequency)".to_string(), + "! Homepage: https://www.haopro.net/".to_string(), + ]; + + let mut filter_set = FilterSet::new(false); + let metadata = filter_set.add_filters(&list[..], ParseOptions::default()); + + assert_eq!(metadata.title, Some("ABPVN Advanced".to_string())); + assert_eq!(metadata.homepage, Some("https://www.haopro.net/".to_string())); + assert_eq!(metadata.expires, None); + assert_eq!(metadata.redirect, None); + } }