From 7281d3e5156d58d0df78c0510d82b7973574aa84 Mon Sep 17 00:00:00 2001 From: shelld3v <59408894+shelld3v@users.noreply.github.com> Date: Sat, 9 Nov 2024 02:19:21 +0700 Subject: [PATCH] Add option to filter duplicate results and deprecate --remove-extensions --- CHANGELOG.md | 1 + config.ini | 1 + lib/core/data.py | 2 +- lib/core/dictionary.py | 3 --- lib/core/fuzzer.py | 20 ++++++++++++++++++-- lib/core/options.py | 7 +++---- lib/parse/cmdline.py | 14 ++++++++------ 7 files changed, 32 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 89627f41a..3a6332da3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ - Load targets from a Nmap XML report - Added --async option to enable asynchronous mode (use coroutines instead of threads) - Added option to disable CLI output entirely +- Option to detect and filter identical results ## [0.4.3] - October 2nd, 2022 - Automatically detect the URI scheme (`http` or `https`) if no scheme is provided diff --git a/config.ini b/config.ini index d2ebc13d6..db59de78b 100644 --- a/config.ini +++ b/config.ini @@ -14,6 +14,7 @@ exclude-subdirs = %%ff/,.;/,..;/,;/,./,../,%%2e/,%%2e%%2e/ random-user-agents = False max-time = 0 exit-on-error = False +#filter-threshold = 10 #subdirs = /,api/ #include-status = 200-299,401 #exclude-status = 400,500-999 diff --git a/lib/core/data.py b/lib/core/data.py index 4534e44e1..514ee6db7 100755 --- a/lib/core/data.py +++ b/lib/core/data.py @@ -35,7 +35,6 @@ "force_extensions": False, "overwrite_extensions": False, "exclude_extensions": (), - "remove_extensions": None, "prefixes": (), "suffixes": (), "uppercase": False, @@ -47,6 +46,7 @@ "force_recursive": False, "recursion_depth": 0, "recursion_status_codes": set(), + "filter_threshold": 0, "subdirs": [], "exclude_subdirs": [], "include_status_codes": set(), diff --git a/lib/core/dictionary.py b/lib/core/dictionary.py index d4d7dd8d1..42ba941af 100755 --- a/lib/core/dictionary.py +++ b/lib/core/dictionary.py @@ -122,9 +122,6 @@ def generate(self, files: list[str] = [], is_blacklist: bool = False) -> list[st # Removing leading "/" to work with prefixes later line = lstrip_once(line, "/") - if options["remove_extensions"]: - line = line.split(".")[0] - if not self.is_valid(line): continue diff --git a/lib/core/fuzzer.py b/lib/core/fuzzer.py index 4eec7821d..dd601223e 100755 --- a/lib/core/fuzzer.py +++ b/lib/core/fuzzer.py @@ -53,6 +53,7 @@ def __init__( self._requester = requester self._dictionary = dictionary self._base_path: str = "" + self._hashes: dict = {} self.exc: Exception | None = None self.match_callbacks = match_callbacks self.not_found_callbacks = not_found_callbacks @@ -82,8 +83,7 @@ def get_scanners_for(self, path: str) -> Generator[BaseScanner, None, None]: for scanner in self.scanners["default"].values(): yield scanner - @staticmethod - def is_excluded(resp: BaseResponse) -> bool: + def is_excluded(self, resp: BaseResponse) -> bool: """Validate the response by different filters""" if resp.status in options["exclude_status_codes"]: @@ -128,6 +128,12 @@ def is_excluded(resp: BaseResponse) -> bool: ): return True + if ( + options["filter_threshold"] + and self._hashes.get(hash(resp), 0) >= options["filter_threshold"] + ): + return True + return False @@ -246,6 +252,11 @@ def scan(self, path: str) -> None: callback(response) return + if options["filter_threshold"]: + hash_ = hash(response) + self._hashes.setdefault(hash_, 0) + self._hashes[hash_] += 1 + try: for callback in self.match_callbacks: callback(response) @@ -391,6 +402,11 @@ async def scan(self, path: str) -> None: callback(response) return + if options["filter_threshold"]: + hash_ = hash(response) + self._hashes.setdefault(hash_, 0) + self._hashes[hash_] += 1 + try: for callback in self.match_callbacks: callback(response) diff --git a/lib/core/options.py b/lib/core/options.py index 75f18d085..c8e7d57ce 100755 --- a/lib/core/options.py +++ b/lib/core/options.py @@ -70,7 +70,7 @@ def parse_options() -> dict[str, Any]: ) ) - if not opt.extensions and not opt.remove_extensions: + if not opt.extensions: print("WARNING: No extension was specified!") if not opt.wordlists: @@ -151,9 +151,7 @@ def parse_options() -> dict[str, Any]: ] opt.exclude_sizes = {size.strip().upper() for size in opt.exclude_sizes.split(",")} - if opt.remove_extensions: - opt.extensions = ("",) - elif opt.extensions == "*": + if opt.extensions == "*": opt.extensions = COMMON_EXTENSIONS elif opt.extensions == "CHANGELOG.md": print( @@ -271,6 +269,7 @@ def merge_config(opt: Values) -> Values: # General opt.thread_count = opt.thread_count or config.safe_getint("general", "threads", 25) opt.async_mode = opt.async_mode or config.safe_getboolean("general", "async") + opt.filter_threshold = opt.filter_threshold or config.safe_getint("general", "filter-threshold", 0) opt.include_status_codes = opt.include_status_codes or config.safe_get( "general", "include-status" ) diff --git a/lib/parse/cmdline.py b/lib/parse/cmdline.py index 194a4029a..02bd3dc54 100755 --- a/lib/parse/cmdline.py +++ b/lib/parse/cmdline.py @@ -116,12 +116,6 @@ def parse_arguments() -> Values: metavar="EXTENSIONS", help="Exclude extension list, separated by commas (e.g. asp,jsp)", ) - dictionary.add_option( - "--remove-extensions", - action="store_true", - dest="remove_extensions", - help="Remove extensions in all paths (e.g. admin.php -> admin)", - ) dictionary.add_option( "--prefixes", action="store", @@ -209,6 +203,14 @@ def parse_arguments() -> Values: metavar="CODES", help="Valid status codes to perform recursive scan, support ranges (separated by commas)", ) + general.add_option( + "--filter-threshold", + action="store", + type="int", + dest="filter_threshold", + metavar="THRESHOLD", + help="Maximum number of results with duplicate responses before getting filtered out", + ) general.add_option( "--subdirs", action="store",