From 0b495e74fc0e8ebda9beddaa51058ea8440d5b35 Mon Sep 17 00:00:00 2001 From: Jun Kurihara Date: Thu, 21 Nov 2024 18:19:03 +0900 Subject: [PATCH] use the separated crate match-domain for domain block --- proxy-lib/Cargo.toml | 2 +- proxy-lib/src/doh_client/error.rs | 3 + .../doh_client/manipulation/domain_block.rs | 113 ++---------------- .../doh_client/manipulation/regexp_vals.rs | 2 +- 4 files changed, 13 insertions(+), 107 deletions(-) diff --git a/proxy-lib/Cargo.toml b/proxy-lib/Cargo.toml index 8b4713b..55022cb 100644 --- a/proxy-lib/Cargo.toml +++ b/proxy-lib/Cargo.toml @@ -43,7 +43,7 @@ bytes = "1.8.0" hickory-proto = { version = "0.24.1", default-features = false } data-encoding = "2.6.0" hashlink = "0.9.1" -cedarwood = "0.4.6" +match-domain = "0.1.1" regex = "1.11.1" # network diff --git a/proxy-lib/src/doh_client/error.rs b/proxy-lib/src/doh_client/error.rs index a1a9118..7e3e1f3 100644 --- a/proxy-lib/src/doh_client/error.rs +++ b/proxy-lib/src/doh_client/error.rs @@ -47,6 +47,9 @@ pub enum DohClientError { #[error("Regex error: {0}")] RegexError(#[from] regex::Error), + #[error("Domain matcher error: {0}")] + DomainMatcherError(#[from] match_domain::Error), + #[error(transparent)] Other(#[from] anyhow::Error), } diff --git a/proxy-lib/src/doh_client/manipulation/domain_block.rs b/proxy-lib/src/doh_client/manipulation/domain_block.rs index 3e43b02..1250316 100644 --- a/proxy-lib/src/doh_client/manipulation/domain_block.rs +++ b/proxy-lib/src/doh_client/manipulation/domain_block.rs @@ -3,7 +3,6 @@ use super::{ dns_message::{build_response_nx, QueryKey}, error::DohClientError, }, - regexp_vals::*, QueryManipulation, QueryManipulationResult, }; use crate::{ @@ -13,9 +12,8 @@ use crate::{ }; use anyhow::bail; use async_trait::async_trait; -use cedarwood::Cedar; use hickory_proto::{op::Message, rr}; -use regex::Regex; +use match_domain::DomainMatchingRule; #[async_trait] impl QueryManipulation for DomainBlockRule { @@ -49,11 +47,10 @@ fn build_response_block(query_message: &Message) -> Message { } #[derive(Debug, Clone)] +/// DomainBlockRule is a query manipulation rule that blocks queries based on domain matching pub struct DomainBlockRule { - prefix_cedar: Cedar, - suffix_cedar: Cedar, - prefix_dict: Vec, - suffix_dict: Vec, + /// inner domain matching rule + inner: DomainMatchingRule, } impl TryFrom<&QueryManipulationConfig> for Option { @@ -62,91 +59,13 @@ impl TryFrom<&QueryManipulationConfig> for Option { let Some(config_domain_block) = &config.domain_block else { return Ok(None); }; - - let start_with_star = Regex::new(r"^\*\..+").unwrap(); - let end_with_star = Regex::new(r".+\.\*$").unwrap(); - // TODO: currently either one of prefix or suffix match with '*' is supported - let re = Regex::new(&format!("{}{}{}", r"^", REGEXP_DOMAIN_OR_PREFIX, r"$")).unwrap(); - let dict: Vec = config_domain_block - .iter() - .map(|d| if start_with_star.is_match(d) { &d[2..] } else { d }) - .filter(|x| re.is_match(x) || (x.split('.').count() == 1)) - .map(|y| y.to_ascii_lowercase()) - .collect(); - let prefix_dict: Vec = dict - .iter() - .filter(|d| end_with_star.is_match(d)) - .map(|d| d[..d.len() - 2].to_string()) - .collect(); - let suffix_dict: Vec = dict - .iter() - .filter(|d| !end_with_star.is_match(d)) - .map(|d| reverse_string(d)) - .collect(); - - let prefix_kv: Vec<(&str, i32)> = prefix_dict - .iter() - .map(AsRef::as_ref) - .enumerate() - .map(|(k, s)| (s, k as i32)) - .collect(); - let mut prefix_cedar = Cedar::new(); - prefix_cedar.build(&prefix_kv); - - let suffix_kv: Vec<(&str, i32)> = suffix_dict - .iter() - .map(AsRef::as_ref) - .enumerate() - .map(|(k, s)| (s, k as i32)) - .collect(); - let mut suffix_cedar = Cedar::new(); - suffix_cedar.build(&suffix_kv); - - Ok(Some(DomainBlockRule { - prefix_cedar, - suffix_cedar, - prefix_dict, - suffix_dict, - })) + let inner = DomainMatchingRule::try_from(config_domain_block.as_slice())?; + Ok(Some(DomainBlockRule { inner })) } } impl DomainBlockRule { - fn find_suffix_match(&self, query_domain: &str) -> bool { - let rev_nn = reverse_string(query_domain); - let matched_items = self - .suffix_cedar - .common_prefix_iter(&rev_nn) - .map(|(x, _)| self.suffix_dict[x as usize].clone()); - - let mut matched_as_domain = matched_items.filter(|found| { - if found.len() == rev_nn.len() { - true - } else if let Some(nth) = rev_nn.chars().nth(found.chars().count()) { - nth.to_string() == "." - } else { - false - } - }); - matched_as_domain.next().is_some() - } - - fn find_prefix_match(&self, query_domain: &str) -> bool { - let matched_items = self - .prefix_cedar - .common_prefix_iter(query_domain) - .map(|(x, _)| self.prefix_dict[x as usize].clone()); - - let mut matched_as_domain = matched_items.filter(|found| { - if let Some(nth) = query_domain.chars().nth(found.chars().count()) { - nth.to_string() == "." - } else { - false - } - }); - matched_as_domain.next().is_some() - } - + /// Check if the query key is in blocklist pub fn in_blocklist(&self, q_key: &QueryKey) -> anyhow::Result { // remove final dot let mut nn = q_key.clone().query_name.to_ascii_lowercase(); @@ -161,26 +80,10 @@ impl DomainBlockRule { } } - if self.find_suffix_match(&nn) { - debug!("[with cw] suffix/exact match found: {}", nn); - return Ok(true); - } - - if self.find_prefix_match(&nn) { - debug!("[with cw] prefix match found: {}", nn); - return Ok(true); - } - - // TODO: other matching patterns - - Ok(false) + Ok(self.inner.is_matched(&nn)) } } -fn reverse_string(text: &str) -> String { - text.chars().rev().collect::() -} - #[cfg(test)] mod tests { use super::*; diff --git a/proxy-lib/src/doh_client/manipulation/regexp_vals.rs b/proxy-lib/src/doh_client/manipulation/regexp_vals.rs index a554515..f4a46e8 100644 --- a/proxy-lib/src/doh_client/manipulation/regexp_vals.rs +++ b/proxy-lib/src/doh_client/manipulation/regexp_vals.rs @@ -1,5 +1,5 @@ pub const REGEXP_DOMAIN: &str = r"([a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9]*\.)+([a-zA-Z]{2,})"; -pub const REGEXP_DOMAIN_OR_PREFIX: &str = r"^([a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9]*\.)+([a-zA-Z]{2,}|\*)"; +// pub const REGEXP_DOMAIN_OR_PREFIX: &str = r"^([a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9]*\.)+([a-zA-Z]{2,}|\*)"; pub const REGEXP_IPV4: &str = r"((25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\.){3}(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])"; pub const REGEXP_IPV6: &str = r"(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))";