Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experiment ac rust/v4 #9902

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions rust/Cargo.toml.in
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ brotli = "~3.4.0"
hkdf = "~0.12.3"
aes = "~0.7.5"
aes-gcm = "~0.9.4"
aho-corasick = "1.1.2"

der-parser = "~8.2.0"
kerberos-parser = { version = "~0.7.1", default_features = false }
Expand Down
157 changes: 157 additions & 0 deletions rust/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,160 @@ pub unsafe extern "C" fn rs_to_hex_sep(
// overwrites last separator with final null char
oslice[3 * islice.len() - 1] = 0;
}

use aho_corasick::AhoCorasick;
use std::collections::HashMap;

#[derive(Debug,Clone)]
struct AhoCorasickPatternData {
pat: Vec<u8>,
sids: Vec<u32>,
ci: bool,
offset: u16,
depth: u16,
}

impl AhoCorasickPatternData {
fn new(pat: Vec<u8>, ci: bool, sids: Vec<u32>, offset: u16, depth: u16) -> Self {
Self { pat, ci, sids, offset, depth }
}
}

#[derive(Default)]
pub struct AhoCorasickStateBuilder {
/// vector of patterns. The final pattern id will depend on the position in this
/// vector, starting at 0.
patterns: Vec<Vec<u8>>,
pattern_id: u32,
/// Hash of patterns with their settings. Will be copied to AhoCorasickStateBuilder
/// in the prepare step.
pattern_data: HashMap<u32,AhoCorasickPatternData>,
/// track if we have case insensitive patterns. If so, we need to tell AC and
/// do a bit more work in validation.
has_ci: bool,
}

impl AhoCorasickStateBuilder {
fn new() -> Self {
Self { ..Default::default() }
}
fn add_pattern(&mut self, pat: Vec<u8>, ci: bool, sids: Vec<u32>, offset: u16, depth: u16) {
self.patterns.push(pat.clone());
if ci {
self.has_ci = true;
}
let pattern_id = self.pattern_id;
self.pattern_id += 1;

self.pattern_data.insert(pattern_id, AhoCorasickPatternData::new(pat.clone(), ci, sids, offset, depth));
}
}

#[no_mangle]
pub extern "C" fn rs_mpm_acrs_new_builder() -> *mut std::os::raw::c_void {
let state = AhoCorasickStateBuilder::new();
let boxed = Box::new(state);
return Box::into_raw(boxed) as *mut _;
}

#[no_mangle]
pub extern "C" fn rs_mpm_acrs_free_builder(state: *mut std::os::raw::c_void) {
let mut _state = unsafe { Box::from_raw(state as *mut AhoCorasickStateBuilder) };
victorjulien marked this conversation as resolved.
Show resolved Hide resolved
}

#[no_mangle]
pub unsafe extern "C" fn rs_mpm_acrs_add_pattern(state: &mut AhoCorasickStateBuilder,
pat: *mut u8, pat_len: u16, sids: *mut u32, sids_len: u32, ci: bool, offset: u16, depth: u16) -> i32 {
let p = unsafe { build_slice!(pat, pat_len as usize) };
let s = unsafe { build_slice!(sids, sids_len as usize) };
Comment on lines +219 to +220
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unsafe not needed here.

Copy link
Member Author

@victorjulien victorjulien Nov 28, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, this is a left over from when clippy adviced to mark the function unsafe, even though the compiler didn't require it. It feels odd to make the function unsafe while the unsafe parts are more limited. I would like rust to consider the other parts safe and be strict in its checks.

state.add_pattern(p.to_vec(), ci, s.to_vec(), offset, depth);
return 0;
}

pub struct AhoCorasickState {
pattern_cnt: u32,
pattern_data: HashMap<u32,AhoCorasickPatternData>,
has_ci: bool,
ac: AhoCorasick,
}

impl AhoCorasickState {
/// build the AC state from the builder
fn prepare(builder: &AhoCorasickStateBuilder) -> Self {
let ac = AhoCorasick::builder()
.ascii_case_insensitive(builder.has_ci)
.build(&builder.patterns)
.unwrap();
Self { ac, has_ci: builder.has_ci, pattern_cnt: builder.pattern_id, pattern_data: builder.pattern_data.clone() }
}

/// Search for the patterns. Returns number of matches.
/// Per pattern found sids are only appended once.
/// TODO review match_cnt logic. In general it's tuned to the unittests now, but it leads to
victorjulien marked this conversation as resolved.
Show resolved Hide resolved
/// some inefficienty. Could make sense to check the bool array first instead of doing the
/// hash map lookup.
fn search(&self, haystack: &[u8], sids: &mut Vec<u32>) -> u32 {
SCLogDebug!("haystack {:?}: looking for {} patterns. Has CI {}", haystack, self.pattern_cnt, self.has_ci);
let mut match_cnt = 0;
// array of bools for patterns we found
let mut matches = vec![false; self.pattern_cnt as usize];
for mat in self.ac.find_overlapping_iter(haystack) {
let pat_id = mat.pattern();
/* bail if we found this pattern before */
if matches[pat_id] {
SCLogDebug!("pattern {:?} already found", pat_id);
continue;
}

let pattern = self.pattern_data.get(&pat_id.as_u32()).unwrap();
if self.has_ci && !pattern.ci {
let found = &haystack[mat.start()..mat.end()];
if found != pattern.pat {
SCLogDebug!("pattern {:?} failed: not an exact match", pat_id);
continue;
}
}

/* enforce offset and depth */
if pattern.offset as usize > mat.start() {
SCLogDebug!("pattern {:?} failed: found before offset", pat_id);
continue;
}
if pattern.depth != 0 && mat.end() > pattern.depth as usize {
SCLogDebug!("pattern {:?} failed: after depth", pat_id);
continue;
}
matches[pat_id] = true;
SCLogDebug!("match! {:?}: {:?}", pat_id, pattern);
sids.append(&mut pattern.sids.clone());
match_cnt += 1;
}
return match_cnt;
}
}

#[no_mangle]
pub extern "C" fn rs_mpm_acrs_prepare_builder(builder: &AhoCorasickStateBuilder) -> *mut std::os::raw::c_void {
let state = AhoCorasickState::prepare(builder);
let boxed = Box::new(state);
return Box::into_raw(boxed) as *mut _;
}
#[no_mangle]
pub extern "C" fn rs_mpm_acrs_state_free(state: *mut std::os::raw::c_void) {
let mut _state = unsafe { Box::from_raw(state as *mut AhoCorasickState) };
}

#[no_mangle]
pub unsafe extern "C" fn rs_mpm_acrs_search(state: &AhoCorasickState, data: *const u8, data_len: u32,
cb: unsafe extern "C" fn(*mut std::os::raw::c_void, *const u32, u32),
cbdata: *mut std::os::raw::c_void) -> u32
{
let mut sids: Vec<u32> = Vec::new();
let data = unsafe { build_slice!(data, data_len as usize) };
let matches = state.search(data, &mut sids);
if !sids.is_empty() {
let sids_s = sids.as_ptr();
unsafe { cb(cbdata, sids_s, sids.len() as u32); };
}
matches
}
2 changes: 2 additions & 0 deletions src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,7 @@ noinst_HEADERS = \
util-mpm-ac-bs.h \
util-mpm-ac.h \
util-mpm-ac-ks.h \
util-mpm-ac-rs.h \
util-mpm.h \
util-mpm-hs.h \
util-napatech.h \
Expand Down Expand Up @@ -1181,6 +1182,7 @@ libsuricata_c_a_SOURCES = \
util-mpm-ac.c \
util-mpm-ac-ks.c \
util-mpm-ac-ks-small.c \
util-mpm-ac-rs.c \
util-mpm.c \
util-mpm-hs.c \
util-napatech.c \
Expand Down
2 changes: 2 additions & 0 deletions src/util-mpm-ac-bs.c
Original file line number Diff line number Diff line change
Expand Up @@ -906,6 +906,8 @@ int SCACBSPreparePatterns(MpmCtx *mpm_ctx)
/* ACPatternList now owns this memory */
ctx->pid_pat_list[ctx->parray[i]->id].sids_size = ctx->parray[i]->sids_size;
ctx->pid_pat_list[ctx->parray[i]->id].sids = ctx->parray[i]->sids;
ctx->parray[i]->sids = NULL;
ctx->parray[i]->sids_size = 0;
}

/* prepare the state table required by AC */
Expand Down
Loading
Loading