From 6c0fe90909df50a9030262c22c8cddc8a26cb528 Mon Sep 17 00:00:00 2001 From: epi <43392618+epi052@users.noreply.github.com> Date: Fri, 3 Nov 2023 06:28:09 -0400 Subject: [PATCH] fixed collect backups filtering (#1016) * fixed collect backups filtering and clippy * added test for filtered backups --- src/event_handlers/outputs.rs | 15 ++++++ src/nlp/document.rs | 2 +- src/nlp/term.rs | 7 +-- src/scanner/ferox_scanner.rs | 93 +++++++++++++++++------------------ tests/test_filters.rs | 43 ++++++++++++++++ 5 files changed, 105 insertions(+), 55 deletions(-) diff --git a/src/event_handlers/outputs.rs b/src/event_handlers/outputs.rs index a93f5108..36a5e4f7 100644 --- a/src/event_handlers/outputs.rs +++ b/src/event_handlers/outputs.rs @@ -328,6 +328,21 @@ impl TermOutHandler { ) .await; + let Some(handles) = self.handles.as_ref() else { + // shouldn't ever happen, but we'll log and return early if it does + log::error!("handles were unexpectedly None, this shouldn't happen"); + return Ok(()); + }; + + if handles + .filters + .data + .should_filter_response(&ferox_response, tx_stats.clone()) + { + // response was filtered for one reason or another, don't process it + continue; + } + self.process_response( tx_stats.clone(), Box::new(ferox_response), diff --git a/src/nlp/document.rs b/src/nlp/document.rs index daeb7859..4072e89b 100644 --- a/src/nlp/document.rs +++ b/src/nlp/document.rs @@ -35,7 +35,7 @@ impl Document { fn add_term(&mut self, word: &str) { let term = Term::new(word); - let metadata = self.terms.entry(term).or_insert_with(TermMetaData::new); + let metadata = self.terms.entry(term).or_default(); *metadata.count_mut() += 1; } diff --git a/src/nlp/term.rs b/src/nlp/term.rs index 86b95919..007b8cee 100644 --- a/src/nlp/term.rs +++ b/src/nlp/term.rs @@ -35,11 +35,6 @@ pub(super) struct TermMetaData { } impl TermMetaData { - /// create a new metadata container - pub(super) fn new() -> Self { - Self::default() - } - /// number of times a `Term` has appeared in any `Document` within the corpus pub(super) fn document_frequency(&self) -> usize { self.term_frequencies().len() @@ -90,7 +85,7 @@ mod tests { #[test] /// test accessors for correctness fn nlp_term_metadata_accessor_test() { - let mut metadata = TermMetaData::new(); + let mut metadata = TermMetaData::default(); *metadata.count_mut() += 1; assert_eq!(metadata.count(), 1); diff --git a/src/scanner/ferox_scanner.rs b/src/scanner/ferox_scanner.rs index 6404708f..df00d471 100644 --- a/src/scanner/ferox_scanner.rs +++ b/src/scanner/ferox_scanner.rs @@ -251,60 +251,57 @@ impl FeroxScanner { // heuristics test block: let test = heuristics::HeuristicTests::new(self.handles.clone()); - if let Ok(dirlist_result) = test.directory_listing(&self.target_url).await { - if dirlist_result.is_some() { - let dirlist_result = dirlist_result.unwrap(); - // at this point, we have a DirListingType, and it's not the None variant - // which means we found directory listing based on the heuristic; now we need - // to process the links that are available if --extract-links was used - - if self.handles.config.extract_links { - let mut extractor = ExtractorBuilder::default() - .response(&dirlist_result.response) - .target(ExtractionTarget::DirectoryListing) - .url(&self.target_url) - .handles(self.handles.clone()) - .build()?; - - let result = extractor.extract_from_dir_listing().await?; - - extraction_tasks.push(extractor.request_links(result).await?); - - log::trace!("exit: scan_url -> Directory listing heuristic"); - - self.handles.stats.send(AddToF64Field( - DirScanTimes, - scan_timer.elapsed().as_secs_f64(), - ))?; - - self.handles.stats.send(SubtractFromUsizeField( - TotalExpected, - progress_bar.length().unwrap_or(0) as usize, - ))?; - } + if let Ok(Some(dirlist_result)) = test.directory_listing(&self.target_url).await { + // at this point, we have a DirListingType, and it's not the None variant + // which means we found directory listing based on the heuristic; now we need + // to process the links that are available if --extract-links was used + + if self.handles.config.extract_links { + let mut extractor = ExtractorBuilder::default() + .response(&dirlist_result.response) + .target(ExtractionTarget::DirectoryListing) + .url(&self.target_url) + .handles(self.handles.clone()) + .build()?; + + let result = extractor.extract_from_dir_listing().await?; + + extraction_tasks.push(extractor.request_links(result).await?); + + log::trace!("exit: scan_url -> Directory listing heuristic"); + + self.handles.stats.send(AddToF64Field( + DirScanTimes, + scan_timer.elapsed().as_secs_f64(), + ))?; + + self.handles.stats.send(SubtractFromUsizeField( + TotalExpected, + progress_bar.length().unwrap_or(0) as usize, + ))?; + } - let mut message = format!("=> {}", style("Directory listing").blue().bright()); + let mut message = format!("=> {}", style("Directory listing").blue().bright()); - if !self.handles.config.extract_links { - write!( - message, - " (remove {} to scan)", - style("--dont-extract-links").bright().yellow() - )?; - } + if !self.handles.config.extract_links { + write!( + message, + " (remove {} to scan)", + style("--dont-extract-links").bright().yellow() + )?; + } - if !self.handles.config.force_recursion { - for handle in extraction_tasks.into_iter().flatten() { - _ = handle.await; - } + if !self.handles.config.force_recursion { + for handle in extraction_tasks.into_iter().flatten() { + _ = handle.await; + } - progress_bar.reset_eta(); - progress_bar.finish_with_message(message); + progress_bar.reset_eta(); + progress_bar.finish_with_message(message); - ferox_scan.finish()?; + ferox_scan.finish()?; - return Ok(()); // nothing left to do if we found a dir listing - } + return Ok(()); // nothing left to do if we found a dir listing } } diff --git a/tests/test_filters.rs b/tests/test_filters.rs index 235e2683..154b2657 100644 --- a/tests/test_filters.rs +++ b/tests/test_filters.rs @@ -247,3 +247,46 @@ fn filters_similar_should_filter_response() { assert_eq!(not_similar.hits(), 1); teardown_tmp_directory(tmp_dir); } + +#[test] +/// when using --collect-backups, should only see results in output +/// when the response shouldn't be otherwise filtered +fn collect_backups_should_be_filtered() { + let srv = MockServer::start(); + let (tmp_dir, file) = setup_tmp_directory(&["LICENSE".to_string()], "wordlist").unwrap(); + + let mock = srv.mock(|when: httpmock::When, then| { + when.method(GET).path("/LICENSE"); + then.status(200).body("this is a test"); + }); + + let mock_two = srv.mock(|when, then| { + when.method(GET).path("/LICENSE.bak"); + then.status(201) + .body("im a backup file, but filtered out because im not 200"); + }); + + let cmd = Command::cargo_bin("feroxbuster") + .unwrap() + .arg("--url") + .arg(srv.url("/")) + .arg("--wordlist") + .arg(file.as_os_str()) + .arg("--status-codes") + .arg("200") + .arg("--collect-backups") + .unwrap(); + + cmd.assert().success().stdout( + predicate::str::contains("/LICENSE") + .and(predicate::str::contains("200")) + .and(predicate::str::contains("/LICENSE.bak")) + .not() + .and(predicate::str::contains("201")) + .not(), + ); + + assert_eq!(mock.hits(), 1); + assert_eq!(mock_two.hits(), 1); + teardown_tmp_directory(tmp_dir); +}