-
Notifications
You must be signed in to change notification settings - Fork 17
/
extract_rwe_features.py
66 lines (58 loc) · 2.51 KB
/
extract_rwe_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import argparse
from library.utils import Utils
import numpy
import os
import time
import shutil
import sys
def main(arguments=None):
# Argument parsing
parser = argparse.ArgumentParser(
description='Calculates the RWE features from a directory of files.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('MalwareDirectory',
help='The directory containing malware to analyze.')
parser.add_argument('DataDirectory',
help='The directory that will contain the data files.')
parser.add_argument("-w", "--window",
help="Window size, in bytes, for running entropy."
"", type=int, required=False)
parser.add_argument("-n", "--nonormalize", action='store_true',
help="Disables entropy normalization."
"", required=False)
parser.add_argument("-d", "--datapoints",
help="The number of data points to sample running window entropy."
"Multiple datapoints can be identified as comma "
"separated values without spaces."
"", type=str, default='512', required=False)
parser.add_argument("-j", "--jobs", type=int, default=1,
help="The number of jobs to do the work, but be 1 or greater."
"", required=False)
if isinstance(arguments, list):
args = parser.parse_args(arguments)
else:
args = parser.parse_args()
if args.jobs < 1:
print("Jobs must be 1 or greater.")
exit()
# Normalize setup...
if args.nonormalize:
normalize = False
else:
normalize = True
# Find window sizes
datapoints = None
if args.datapoints:
datapoints = args.datapoints.split(',')
datapoints = [x.strip() for x in datapoints]
datapoints = [int(x) for x in datapoints]
# Crawl the directories for malware and calculate rwe
Utils.extract_rwe_features_from_directory(args.MalwareDirectory,
args.DataDirectory,
window_size=args.window,
normalize=normalize,
number_of_data_points=datapoints,
njobs=args.jobs)
if __name__ == "__main__":
args = sys.argv[1:]
main(args)