forked from stamparm/DSXS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdsxs.py
149 lines (130 loc) · 10.5 KB
/
dsxs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/python3
import optparse, random, re, string, urllib, urllib.parse, urllib.request # Python 3 required
import concurrent.futures
import mimetypes
import re
NAME, VERSION, AUTHOR, LICENSE = "Damn Small XSS Scanner (DSXS) EDITED @nishant57", "0.3c", "Miroslav Stampar (@stamparm)", "Public domain (FREE)"
SMALLER_CHAR_POOL = ('<', '>') # characters used for XSS tampering of parameter values (smaller set - for avoiding possible SQLi errors)
LARGER_CHAR_POOL = ('\'', '"', '>', '<', ';') # characters used for XSS tampering of parameter values (larger set)
GET, POST = "GET", "POST" # enumerator-like values used for marking current phase
PREFIX_SUFFIX_LENGTH = 5 # length of random prefix/suffix used in XSS tampering
COOKIE, UA, REFERER = "Cookie", "User-Agent", "Referer" # optional HTTP header names
TIMEOUT = 30 # connection timeout in seconds
DOM_FILTER_REGEX = r"(?s)<!--.*?-->|\bescape\([^)]+\)|\([^)]+==[^(]+\)|\"[^\"]+\"|'[^']+'" # filtering regex used before DOM XSS search
REGULAR_PATTERNS = ( # each (regular pattern) item consists of (r"context regex", (prerequisite unfiltered characters), "info text", r"content removal regex")
(r"\A[^<>]*%(chars)s[^<>]*\Z", ('<', '>'), "\".xss.\", pure text response, %(filtering)s filtering", None),
(r"<!--[^>]*%(chars)s|%(chars)s[^<]*-->", ('<', '>'), "\"<!--.'.xss.'.-->\", inside the comment, %(filtering)s filtering", None),
(r"(?s)<script[^>]*>[^<]*?'[^<']*%(chars)s|%(chars)s[^<']*'[^<]*</script>", ('\'', ';'), "\"<script>.'.xss.'.</script>\", enclosed by <script> tags, inside single-quotes, %(filtering)s filtering", r"\\'|{[^\n]+}"),
(r'(?s)<script[^>]*>[^<]*?"[^<"]*%(chars)s|%(chars)s[^<"]*"[^<]*</script>', ('"', ';'), "'<script>.\".xss.\".</script>', enclosed by <script> tags, inside double-quotes, %(filtering)s filtering", r'\\"|{[^\n]+}'),
(r"(?s)<script[^>]*>[^<]*?%(chars)s|%(chars)s[^<]*</script>", (';',), "\"<script>.xss.</script>\", enclosed by <script> tags, %(filtering)s filtering", r"&(#\d+|[a-z]+);|'[^'\s]+'|\"[^\"\s]+\"|{[^\n]+}"),
(r">[^<]*%(chars)s[^<]*(<|\Z)", ('<', '>'), "\">.xss.<\", outside of tags, %(filtering)s filtering", r"(?s)<script.+?</script>|<!--.*?-->"),
(r"<[^>]*=\s*'[^>']*%(chars)s[^>']*'[^>]*>", ('\'',), "\"<.'.xss.'.>\", inside the tag, inside single-quotes, %(filtering)s filtering", r"(?s)<script.+?</script>|<!--.*?-->|\\"),
(r'<[^>]*=\s*"[^>"]*%(chars)s[^>"]*"[^>]*>', ('"',), "'<.\".xss.\".>', inside the tag, inside double-quotes, %(filtering)s filtering", r"(?s)<script.+?</script>|<!--.*?-->|\\"),
(r"<[^>]*%(chars)s[^>]*>", (), "\"<.xss.>\", inside the tag, outside of quotes, %(filtering)s filtering", r"(?s)<script.+?</script>|<!--.*?-->|=\s*'[^']*'|=\s*\"[^\"]*\""),
)
DOM_PATTERNS = ( # each (dom pattern) item consists of r"recognition regex"
r"(?s)<script[^>]*>[^<]*?(var|\n)\s*(\w+)\s*=[^;]*(document\.(location|URL|documentURI)|location\.(href|search)|window\.location)[^;]*;[^<]*(document\.write(ln)?\(|\.innerHTML\s*=|eval\(|setTimeout\(|setInterval\(|location\.(replace|assign)\(|setAttribute\()[^;]*\2.*?</script>",
r"(?s)<script[^>]*>[^<]*?(document\.write\(|\.innerHTML\s*=|eval\(|setTimeout\(|setInterval\(|location\.(replace|assign)\(|setAttribute\()[^;]*(document\.(location|URL|documentURI)|location\.(href|search)|window\.location).*?</script>",
)
_headers = {} # used for storing dictionary with optional header values
# Function to check if the content type is binary or JSON
def is_binary_or_json_content(content_type):
# Normalize content type by stripping parameters
if content_type:
content_type = content_type.split(';')[0].strip() # Get the main type, ignoring parameters
binary_mime_types = [
"application/pdf", "application/zip", "application/octet-stream",
"application/x-tar", "application/vnd.rar", "application/gzip",
"application/epub+zip", "application/x-bzip", "application/x-bzip2",
"application/x-freearc", "application/x-7z-compressed",
"text/javascript",
"image/", "audio/", "video/", "font/"
]
json_mime_types = [
"application/json", "application/vnd.api+json"
]
return any(content_type.startswith(mime) for mime in binary_mime_types) or \
content_type in json_mime_types
return False
def _retrieve_content(url, data=None):
try:
req = urllib.request.Request("".join(url[i].replace(' ', "%20") if i > url.find('?') else url[i] for i in range(len(url))), data.encode("utf8", "ignore") if data else None, _headers)
response = urllib.request.urlopen(req, timeout=TIMEOUT)
content_type = response.headers.get('Content-Type', '')
retval = response.read()
# Check if the response is binary or JSON
if is_binary_or_json_content(content_type):
print(f"Skipping scan for {url}: response is binary or JSON.")
return ""
return retval.decode("utf8", "ignore") if hasattr(retval, "decode") else ""
except Exception as ex:
retval = ex.read() if hasattr(ex, "read") else str(ex.args[-1])
return (retval.decode("utf8", "ignore") if hasattr(retval, "decode") else "") or ""
def _contains(content, chars):
content = re.sub(r"\\[%s]" % re.escape("".join(chars)), "", content) if chars else content
return all(char in content for char in chars)
def scan_page(url, data=None):
retval, usable = False, False
url, data = re.sub(r"=(&|\Z)", "=1\g<1>", url) if url else url, re.sub(r"=(&|\Z)", "=1\g<1>", data) if data else data
original = re.sub(DOM_FILTER_REGEX, "", _retrieve_content(url, data))
dom = next(filter(None, (re.search(_, original) for _ in DOM_PATTERNS)), None)
if dom:
print(" (i) %s page itself appears to be XSS vulnerable (DOM)" % url)
print(" (o) ...%s..." % dom.group(0))
retval = True
try:
for phase in (GET, POST):
current = url if phase is GET else (data or "")
for match in re.finditer(r"((\A|[?&])(?P<parameter>[\w\[\]]+)=)(?P<value>[^&#]*)", current):
found, usable = False, True
print("* scanning %s parameter '%s'" % (phase, match.group("parameter")))
prefix, suffix = ("".join(random.sample(string.ascii_lowercase, PREFIX_SUFFIX_LENGTH)) for i in range(2))
for pool in (LARGER_CHAR_POOL, SMALLER_CHAR_POOL):
if not found:
tampered = current.replace(match.group(0), "%s%s" % (match.group(0), urllib.parse.quote("%s%s%s%s" % ("'" if pool == LARGER_CHAR_POOL else "", prefix, "".join(random.sample(pool, len(pool))), suffix))))
content = (_retrieve_content(tampered, data) if phase is GET else _retrieve_content(url, tampered)).replace("%s%s" % ("'" if pool == LARGER_CHAR_POOL else "", prefix), prefix)
for regex, condition, info, content_removal_regex in REGULAR_PATTERNS:
filtered = re.sub(content_removal_regex or "", "", content)
for sample in re.finditer("%s([^ ]+?)%s" % (prefix, suffix), filtered, re.I):
context = re.search(regex % {"chars": re.escape(sample.group(0))}, filtered, re.I)
if context and not found and sample.group(1).strip():
if _contains(sample.group(1), condition):
print(tampered + " %s parameter '%s' appears to be XSS vulnerable (%s)" % (phase, match.group("parameter"), info % dict((("filtering", "no" if all(char in sample.group(1) for char in LARGER_CHAR_POOL) else "some"),))))
found = retval = True
break
if not usable:
print(" (x) no usable GET/POST parameters found")
except KeyboardInterrupt:
print("\r (x) Ctrl-C pressed")
return retval
def init_options(proxy=None, cookie=None, ua=None, referer=None):
global _headers
_headers = dict(filter(lambda _: _[1], ((COOKIE, cookie), (UA, ua or NAME), (REFERER, referer))))
urllib.request.install_opener(urllib.request.build_opener(urllib.request.ProxyHandler({'http': proxy})) if proxy else None)
if __name__ == "__main__":
parser = optparse.OptionParser(version=VERSION)
parser.add_option("-u", "--url", dest="url", help="Target URL (e.g. \"http://www.target.com/page.php?id=1\")")
parser.add_option("--file", dest="file", help="File containing list of URLs")
parser.add_option("--data", dest="data", help="POST data (e.g. \"query=test\")")
parser.add_option("--cookie", dest="cookie", help="HTTP Cookie header value")
parser.add_option("--user-agent", dest="ua", help="HTTP User-Agent header value")
parser.add_option("--referer", dest="referer", help="HTTP Referer header value")
parser.add_option("--proxy", dest="proxy", help="HTTP proxy address (e.g. \"http://127.0.0.1:8080\")")
options, _ = parser.parse_args()
if options.file: # If file option is used
with open(options.file, 'r') as f:
urls = f.readlines()
init_options(options.proxy, options.cookie, options.ua, options.referer)
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
future_to_url = {executor.submit(scan_page, url.strip() if url.startswith("http") else f"http://{url.strip()}", options.data): url for url in urls}
for future in concurrent.futures.as_completed(future_to_url):
url = future_to_url[future]
try:
future.result() # returns the results of your scan_page function
except Exception as e:
print(f"An error occurred during the scanning of {url}: {e}")
elif options.url:
init_options(options.proxy, options.cookie, options.ua, options.referer)
result = scan_page(options.url if options.url.startswith("http") else "http://%s" % options.url, options.data)
else:
parser.print_help()