This repository has been archived by the owner on Jun 7, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 11
/
strings.py
328 lines (296 loc) · 16.2 KB
/
strings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
# -*- coding: utf-8 -*-
# This file is part of Viper - https://github.com/viper-framework/viper
# See the file 'LICENSE' for copying permission.
import os
import re
import string
from socket import inet_pton, AF_INET6, error as socket_error
from viper.common.abstracts import Module
from viper.common.objects import File, IOBytes
from viper.core.session import __sessions__
from viper.core.database import Database
from viper.core.storage import get_sample_path
DOMAIN_REGEX = re.compile(r'([a-z0-9][a-z0-9\-]{0,61}[a-z0-9]\.)+[a-z0-9][a-z0-9\-]*[a-z0-9]', re.IGNORECASE)
IPV4_REGEX = re.compile(r'[1-2]?[0-9]?[0-9]\.[1-2]?[0-9]?[0-9]\.[1-2]?[0-9]?[0-9]\.[1-2]?[0-9]?[0-9]')
IPV6_REGEX = re.compile(r'((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}'
r'|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9'
r'A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25['
r'0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3'
r'})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|['
r'1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,'
r'4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:'
r'))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-'
r'5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]'
r'{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d'
r'\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7}'
r')|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d'
r'\d|[1-9]?\d)){3}))|:)))(%.+)?', re.IGNORECASE | re.S)
PDB_REGEX = re.compile(r'\.pdb$', re.IGNORECASE)
URL_REGEX = re.compile('http(s){0,1}://', re.IGNORECASE)
GET_POST_REGEX = re.compile('(GET|POST) ')
HOST_REGEX = re.compile('Host: ')
USERAGENT_REGEX = re.compile(r'(Mozilla|curl|Wget|Opera)/.+\(.+\;.+\)', re.IGNORECASE)
EMAIL_REGEX = re.compile(r'[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}', re.IGNORECASE)
REGKEY_REGEX = re.compile('(HKEY_CLASSES_ROOT|HKEY_CURRENT_USER|HKEY_LOCAL_MACHINE|HKEY_USERS|HKEY_CURRENT_CONFIG|HKCR|HKCU|HKLM|HKU|HKCC)(/|\x5c\x5c)', re.IGNORECASE)
REGKEY2_REGEX = re.compile(r'(CurrentVersion|Software\\Microsoft|Windows NT|Microsoft\\Interface)')
FILE_REGEX = re.compile(r'\w+\.(EXE|DLL|BAT|PS|INI|PIF|SCR|DOC|DOCX|DOCM|PPT|PPTX|PPTS|XLS|XLT|XLSX|XLTX|XLSM|XLTM|ZIP|RAR)$', re.U | re.IGNORECASE)
TLD = [
'AC', 'ACADEMY', 'ACTOR', 'AD', 'AE', 'AERO', 'AF', 'AG', 'AGENCY', 'AI', 'AL', 'AM', 'AN', 'AO', 'AQ', 'AR',
'ARPA', 'AS', 'ASIA', 'AT', 'AU', 'AW', 'AX', 'AZ', 'BA', 'BAR', 'BARGAINS', 'BB', 'BD', 'BE', 'BERLIN', 'BEST',
'BF', 'BG', 'BH', 'BI', 'BID', 'BIKE', 'BIZ', 'BJ', 'BLUE', 'BM', 'BN', 'BO', 'BOUTIQUE', 'BR', 'BS', 'BT',
'BUILD', 'BUILDERS', 'BUZZ', 'BV', 'BW', 'BY', 'BZ', 'CA', 'CAB', 'CAMERA', 'CAMP', 'CARDS', 'CAREERS', 'CAT',
'CATERING', 'CC', 'CD', 'CENTER', 'CEO', 'CF', 'CG', 'CH', 'CHEAP', 'CHRISTMAS', 'CI', 'CK', 'CL', 'CLEANING',
'CLOTHING', 'CLUB', 'CM', 'CN', 'CO', 'CODES', 'COFFEE', 'COM', 'COMMUNITY', 'COMPANY', 'COMPUTER', 'CONDOS',
'CONSTRUCTION', 'CONTRACTORS', 'COOL', 'COOP', 'CR', 'CRUISES', 'CU', 'CV', 'CW', 'CX', 'CY', 'CZ', 'DANCE',
'DATING', 'DE', 'DEMOCRAT', 'DIAMONDS', 'DIRECTORY', 'DJ', 'DK', 'DM', 'DNP', 'DO', 'DOMAINS', 'DZ', 'EC',
'EDU', 'EDUCATION', 'EE', 'EG', 'EMAIL', 'ENTERPRISES', 'EQUIPMENT', 'ER', 'ES', 'ESTATE', 'ET', 'EU', 'EVENTS',
'EXPERT', 'EXPOSED', 'FARM', 'FI', 'FISH', 'FJ', 'FK', 'FLIGHTS', 'FLORIST', 'FM', 'FO', 'FOUNDATION', 'FR',
'FUTBOL', 'GA', 'GALLERY', 'GB', 'GD', 'GE', 'GF', 'GG', 'GH', 'GI', 'GIFT', 'GL', 'GLASS', 'GM', 'GN', 'GOV',
'GP', 'GQ', 'GR', 'GRAPHICS', 'GS', 'GT', 'GU', 'GUITARS', 'GURU', 'GW', 'GY', 'HK', 'HM', 'HN', 'HOLDINGS',
'HOLIDAY', 'HOUSE', 'HR', 'HT', 'HU', 'ID', 'IE', 'IL', 'IM', 'IMMOBILIEN', 'IN', 'INDUSTRIES', 'INFO', 'INK',
'INSTITUTE', 'INT', 'INTERNATIONAL', 'IO', 'IQ', 'IR', 'IS', 'IT', 'JE', 'JM', 'JO', 'JOBS', 'JP', 'KAUFEN',
'KE', 'KG', 'KH', 'KI', 'KIM', 'KITCHEN', 'KIWI', 'KM', 'KN', 'KOELN', 'KP', 'KR', 'KRED', 'KW', 'KY', 'KZ',
'LA', 'LAND', 'LB', 'LC', 'LI', 'LIGHTING', 'LIMO', 'LINK', 'LK', 'LR', 'LS', 'LT', 'LU', 'LUXURY', 'LV', 'LY',
'MA', 'MAISON', 'MANAGEMENT', 'MANGO', 'MARKETING', 'MC', 'MD', 'ME', 'MENU', 'MG', 'MH', 'MIL', 'MK', 'ML',
'MM', 'MN', 'MO', 'MOBI', 'MODA', 'MONASH', 'MP', 'MQ', 'MR', 'MS', 'MT', 'MU', 'MUSEUM', 'MV', 'MW', 'MX',
'MY', 'MZ', 'NA', 'NAGOYA', 'NAME', 'NC', 'NE', 'NET', 'NEUSTAR', 'NF', 'NG', 'NI', 'NINJA', 'NL', 'NO', 'NP',
'NR', 'NU', 'NZ', 'OKINAWA', 'OM', 'ONION', 'ONL', 'ORG', 'PA', 'PARTNERS', 'PARTS', 'PE', 'PF', 'PG', 'PH',
'PHOTO', 'PHOTOGRAPHY', 'PHOTOS', 'PICS', 'PINK', 'PK', 'PL', 'PLUMBING', 'PM', 'PN', 'POST', 'PR', 'PRO',
'PRODUCTIONS', 'PROPERTIES', 'PS', 'PT', 'PUB', 'PW', 'PY', 'QA', 'QPON', 'RE', 'RECIPES', 'RED', 'RENTALS',
'REPAIR', 'REPORT', 'REVIEWS', 'RICH', 'RO', 'RS', 'RU', 'RUHR', 'RW', 'SA', 'SB', 'SC', 'SD', 'SE', 'SEXY',
'SG', 'SH', 'SHIKSHA', 'SHOES', 'SI', 'SINGLES', 'SJ', 'SK', 'SL', 'SM', 'SN', 'SO', 'SOCIAL', 'SOLAR',
'SOLUTIONS', 'SR', 'ST', 'SU', 'SUPPLIES', 'SUPPLY', 'SUPPORT', 'SV', 'SX', 'SY', 'SYSTEMS', 'SZ', 'TATTOO',
'TC', 'TD', 'TECHNOLOGY', 'TEL', 'TF', 'TG', 'TH', 'TIENDA', 'TIPS', 'TJ', 'TK', 'TL', 'TM', 'TN', 'TO',
'TODAY', 'TOKYO', 'TOOLS', 'TP', 'TR', 'TRAINING', 'TRAVEL', 'TT', 'TV', 'TW', 'TZ', 'UA', 'UG', 'UK', 'UNO',
'US', 'UY', 'UZ', 'VA', 'VACATIONS', 'VC', 'VE', 'VENTURES', 'VG', 'VI', 'VIAJES', 'VILLAS', 'VISION', 'VN',
'VOTE', 'VOTING', 'VOTO', 'VOYAGE', 'VU', 'WANG', 'WATCH', 'WED', 'WF', 'WIEN', 'WIKI', 'WORKS', 'WS',
'XN--3BST00M', 'XN--3DS443G', 'XN--3E0B707E', 'XN--45BRJ9C', 'XN--55QW42G', 'XN--55QX5D', 'XN--6FRZ82G',
'XN--6QQ986B3XL', 'XN--80AO21A', 'XN--80ASEHDB', 'XN--80ASWG', 'XN--90A3AC', 'XN--C1AVG', 'XN--CG4BKI',
'XN--CLCHC0EA0B2G2A9GCD', 'XN--D1ACJ3B', 'XN--FIQ228C5HS', 'XN--FIQ64B', 'XN--FIQS8S', 'XN--FIQZ9S',
'XN--FPCRJ9C3D', 'XN--FZC2C9E2C', 'XN--GECRJ9C', 'XN--H2BRJ9C', 'XN--I1B6B1A6A2E', 'XN--IO0A7I', 'XN--J1AMH',
'XN--J6W193G', 'XN--KPRW13D', 'XN--KPRY57D', 'XN--L1ACC', 'XN--LGBBAT1AD8J', 'XN--MGB9AWBF', 'XN--MGBA3A4F16A',
'XN--MGBAAM7A8H', 'XN--MGBAB2BD', 'XN--MGBAYH7GPA', 'XN--MGBBH1A71E', 'XN--MGBC0A9AZCG', 'XN--MGBERP4A5D4AR',
'XN--MGBX4CD0AB', 'XN--NGBC5AZD', 'XN--NQV7F', 'XN--NQV7FS00EMA', 'XN--O3CW4H', 'XN--OGBPF8FL', 'XN--P1AI',
'XN--PGBS0DH', 'XN--Q9JYB4C', 'XN--RHQV96G', 'XN--S9BRJ9C', 'XN--UNUP4Y', 'XN--WGBH1C', 'XN--WGBL6A',
'XN--XKC2AL3HYE2A', 'XN--XKC2DL3A5EE0H', 'XN--YFRO4I67O', 'XN--YGBI2AMMX', 'XN--ZFR164B', 'XXX', 'XYZ', 'YE',
'YT', 'ZA', 'ZM', 'ZONE', 'ZW']
class Strings(Module):
cmd = 'strings'
description = 'Extract strings from file'
authors = ['nex', 'Brian Wallace', 'Christophe Vandeplas']
def __init__(self):
super(Strings, self).__init__()
self.parser.add_argument('-a', '--all', action='store_true', help='Print all strings')
self.parser.add_argument('-F', '--files', action='store_true', help='Extract filenames from strings')
self.parser.add_argument('-H', '--hosts', action='store_true', help='Extract IP addresses and domains from strings')
self.parser.add_argument('-N', '--network', action='store_true', help='Extract various network related strings')
self.parser.add_argument('-I', '--interesting', action='store_true', help='Extract various interesting strings')
self.parser.add_argument('-S', '--search', dest='search_string', help='Search for a specfic string')
self.parser.add_argument('-s', '--scan', action='store_true', help='Scan all files in the project with all the scanners')
self.parser.add_argument('-x', '--xor', type=lambda x: int(x, 0), help='Apply xor key prior to searching for strings.')
self.parser.add_argument('-r', '--rabin2', action='store_true', help='Use r2 izz (aka rabin2 -zz) for string extraction.')
def extract_hosts(self, strings):
results = []
for entry in strings:
to_add = False
if IPV4_REGEX.search(entry):
to_add = True
elif IPV6_REGEX.search(entry):
try:
inet_pton(AF_INET6, entry)
except socket_error:
continue
else:
to_add = True
elif DOMAIN_REGEX.search(entry):
if entry[entry.rfind('.') + 1:].upper() in TLD:
to_add = True
if to_add:
if entry not in results:
results.append(entry)
return results
def extract_network(self, strings):
results = []
for entry in strings:
to_add = False
if URL_REGEX.search(entry):
to_add = True
if GET_POST_REGEX.search(entry):
to_add = True
if HOST_REGEX.search(entry):
to_add = True
if USERAGENT_REGEX.search(entry):
to_add = True
if EMAIL_REGEX.search(entry):
if entry[entry.rfind('.') + 1:].upper() in TLD:
to_add = True
if to_add:
if entry not in results:
results.append(entry)
return results
def extract_files(self, strings):
results = []
for entry in strings:
to_add = False
if FILE_REGEX.search(entry):
to_add = True
if to_add:
if entry not in results:
results.append(entry)
return results
def extract_interesting(self, strings):
results = []
for entry in strings:
to_add = False
if PDB_REGEX.search(entry):
to_add = True
if REGKEY_REGEX.search(entry):
to_add = True
if REGKEY2_REGEX.search(entry):
to_add = True
if to_add:
if entry not in results:
results.append(entry)
return results
def extract_search(self, strings, search_regex):
results = []
for entry in strings:
if re.search(search_regex, entry):
results.append(entry)
return results
def get_strings_r2(self, f, min=4, xor_key=None):
'''
Uses r2 izzj to pull strings from the binary
returns a dict: offset, value
'''
results = []
try:
import r2pipe
from base64 import b64decode
except Exception as ex:
print('Failed to import r2pipe. Is r2pipe installed?\n{ex}'.format(ex=ex))
return results
r2 = r2pipe.open(f)
if xor_key:
print('XOR not currently support when using rabin2, try without "-x"')
return results
else:
r2results = r2.cmdj('izzj')
for r in r2results:
if r['length'] >= min:
results.append(b64decode(r['string']).decode('utf-8'))
return results
def get_strings_base(self, f, min=4, rabin_extract=False, xor_key=None):
'''
A wrapper class to handle XORing of the file and which string extraction
method to usee.
-x does not work when using -r (rabin) to extract the strings.
'''
if rabin_extract:
strings = self.get_strings_r2(f, min, xor_key)
else:
if xor_key:
decoded = self.xordata(File(f), self.args.xor)
strings = self.get_strings(decoded, min)
else:
strings = self.get_strings(File(f), min)
return strings
def xordata(self, f, key):
encoded = bytearray(f.data)
for i in range(len(encoded)):
encoded[i] ^= key
return IOBytes(encoded)
def get_strings(self, f, min=4):
'''
String implementation see http://stackoverflow.com/a/17197027/6880819
Extended with Unicode support
'''
results = []
result = ""
counter = 1
wide_word = False
for c in f.data.decode('utf-8', 'ignore'):
# already have something, check if the second byte is a null
if counter == 2 and c == "\x00":
wide_word = True
counter += 1
continue
# every 2 chars we allow a 00
if wide_word and c == "\x00" and not counter % 2:
counter += 1
continue
# valid char, go to next - newlines are to be considered as the end of the string
if c in string.printable and c not in ['\n', '\r']:
result += c
counter += 1
continue
if len(result) >= min:
results.append(result)
# reset the variables
result = ""
counter = 1
wide_word = False
if len(result) >= min: # catch result at EOF
results.append(result)
return results
def process_strings(self, strings, sample_name=""):
if sample_name:
prefix = '{} - '.format(sample_name)
else:
prefix = ''
if self.args.all:
self.log('success', '{}All strings:'.format(prefix))
for entry in strings:
self.log('', entry)
if self.args.hosts:
results = self.extract_hosts(strings)
if results:
self.log('success', '{}IP addresses and domains:'.format(prefix))
for result in results:
self.log('item', result)
if self.args.network:
results = self.extract_network(strings)
if results:
self.log('success', '{}Network related:'.format(prefix))
for result in results:
self.log('item', result)
if self.args.files:
results = self.extract_files(strings)
if results:
self.log('success', '{}Filenames:'.format(prefix))
for result in results:
self.log('item', result)
if self.args.interesting:
results = self.extract_interesting(strings)
if results:
self.log('success', '{}Various interesting strings:'.format(prefix))
for result in results:
self.log('item', result)
if self.args.search_string:
results = self.extract_search(strings, self.args.search_string.replace('"', ''))
if results:
self.log('success', '{}Found strings:'.format(prefix))
for result in results:
self.log('item', result)
def run(self):
super(Strings, self).run()
if self.args is None:
return
if not (self.args.all or self.args.files or self.args.hosts or self.args.network or self.args.interesting or self.args.search_string):
self.log('error', 'At least one of the parameters is required')
self.usage()
return
if self.args.scan:
db = Database()
samples = db.find(key='all')
for sample in samples:
sample_path = get_sample_path(sample.sha256)
strings = self.get_strings_base(sample_path, xor_key=self.args.xor, rabin_extract=self.args.rabin2)
self.process_strings(strings, sample.name)
else:
if not __sessions__.is_set():
self.log('error', "No open session. This command expects a file to be open.")
return
if os.path.exists(__sessions__.current.file.path):
strings = self.get_strings_base(__sessions__.current.file.path, xor_key=self.args.xor, rabin_extract=self.args.rabin2)
self.process_strings(strings)