-
Notifications
You must be signed in to change notification settings - Fork 17
/
vt_intelligence_hunting_downloader.py
157 lines (137 loc) · 7.24 KB
/
vt_intelligence_hunting_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import argparse
import datetime
import os
import time
import pandas as pd
from library.files import sha256_file
from virus_total_apis import IntelApi
def main():
parser = argparse.ArgumentParser(
description='Downloads samples from VT Intelligence.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('OutputDirectory',
help='The output directory for the samples.')
parser.add_argument("-a", "--apikey",
help="Your VT Intelligence API key."
"", required=True)
parser.add_argument("-p", "--positives",
help="Detections must have at least this many positives."
"",
type=int, default=25)
parser.add_argument("-n", "--number_of_samples",
help="The number of files to download. "
"Set to zero for all downloads.",
type=int, default=0)
parser.add_argument("-d", "--delete_downloaded",
help="Delete downloaded samples and metadata from feed."
"", action='store_true')
parser.add_argument("-dn", "--delete_non_matches",
help="Delete samples that do not match from feed."
"", action='store_true')
parser.add_argument("-dd", "--dont_download_sample",
help="Enable to just get metadata, without downloading samples."
"", action='store_true')
args = parser.parse_args()
try:
os.stat(args.OutputDirectory)
except:
os.makedirs(args.OutputDirectory)
intel_api = IntelApi(args.apikey)
downloads = 0
nextpage = None
df = pd.DataFrame()
rows_to_add = []
while True:
try:
results = None
while results is None:
results = intel_api.get_intel_notifications_feed(nextpage)
nextpage = results['results']['next']
results = results['results']
if 'error' in results:
print("\tError downloading hashes, retrying...")
time.sleep(60)
results = None
print("Downloading hashes for samples...")
for notification in results['notifications']:
if int(notification['positives']) >= args.positives:
subdir = os.path.join(args.OutputDirectory,
notification['ruleset_name'],
notification['subject'])
filename = os.path.join(subdir, notification['sha256'])
if not os.path.isfile(filename):
# Make the directory
print("\tDownloading {0}".format(notification['sha256']))
if not args.dont_download_sample:
try:
os.stat(subdir)
except:
os.makedirs(subdir)
downloaded = False
while downloaded is False:
try:
response = intel_api.get_file(notification['sha256'], subdir)
except KeyboardInterrupt:
if os.path.isfile(filename):
os.remove(filename)
raise
print("\t\tDownloaded {0}".format(notification['sha256']))
print("\t\tVerifying hash...")
expected_hash = notification['sha256'].upper()
dl_hash = sha256_file(filename).upper()
if expected_hash != dl_hash:
print("\t**** DOWNLOAD ERROR! SHA256 Does not match!")
print("\t\tExpected SHA256: {0}".format(expected_hash))
print("\t\tCalculated SHA256: {0}".format(dl_hash))
print("\t\tWill not delete this sample from the feed.")
print("\t\tHave you exceeded your quota?")
else:
print("\t\t\tHash verified!")
downloaded = True
if args.delete_downloaded:
print("\t\tDeleting downloaded sample from feed...")
del_response = intel_api.delete_intel_notifications([notification['id']])
else:
print("\t\tSkipping sample download, downloading metadata...")
if args.delete_downloaded:
print("\t\tDeleting downloaded sample from feed...")
del_response = intel_api.delete_intel_notifications([notification['id']])
downloads += 1
print("\t\tDownloaded {0:,} samples...".format(downloads))
else:
print("\tDeleting duplicate sample from feed...")
if args.delete_downloaded:
del_response = intel_api.delete_intel_notifications([notification['id']])
ds = pd.Series(notification)
ds.name = notification['sha256']
ds_scans = pd.Series(notification['scans'])
ds_scans.name = notification['sha256']
ds = ds.append(ds_scans)
rows_to_add.append(ds)
else:
if args.delete_non_matches:
# Delete the notification if it does not match
del_response = intel_api.delete_intel_notifications([notification['id']])
if args.number_of_samples > 0 and downloads >= args.number_of_samples:
break
if nextpage is None or (args.number_of_samples > 0 and
downloads >= args.number_of_samples):
break
except KeyboardInterrupt:
print("Caught CTRL-C!")
break
print("Assembling HDF...")
df = df.append(rows_to_add)
now = datetime.datetime.now()
now_str = "{0}_{1:02}_{2:02}_{3:02}_{4:02}_{5:02}_{6}".format(now.year,
now.month,
now.day,
now.hour,
now.minute,
now.second,
now.microsecond)
print("Writing metadata HDF...")
df.to_hdf(os.path.join(args.OutputDirectory, "vti_metadata_{0}.hdf".format(now_str)), 'data')
print("Downloaded {0:,} Total Samples".format(downloads))
if __name__ == "__main__":
main()