-
Notifications
You must be signed in to change notification settings - Fork 1
/
countCWE.py
176 lines (154 loc) · 5.86 KB
/
countCWE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#! /usr/bin/env python
##
# Author: Preston Ruff
# Filename: countCWE.py
##
"""
Collect, count, and sort CWE references from CVE data feeds.
Analyzes CVE data from NVD JSON data feeds and sorts contained CWE references
by decreasing frequency of occurence.
Bugs: the optional CWE research view input file does not have the names of CWEs
of the type 'category' so no text will be seen in the name section for those CWEs
in the file outputed by countCWE.py
"""
import argparse
import json
import csv
import sys
import os
def sumFreq(cweList):
"""Determine frequency of CWE occurences and sort by frequency
Frequency is determined by the number of lines which contain a respective
CWE-ID within cweList.
Args:
cweList: an unsorted list containing CWE-ID's,
many of which are repeated
Returns:
A tuple which contains the following lists:
1. minimized (no repeating elements) list of cweID's
2. frequency list of CWE-ID occurences.
The above lists are sorted together using the frequency values.
Todo:
If I decide to get more fields from the CWE CSV dictionary file, I can simply change my titledict values to be the entire row of the input csv file (except for the raw ID number in the first column), then I can call python's csv library and provide a given string to the csv function as long as I make the string iterable by doing something like iter(string) before giving it to the csv parsing function. Then I can index each field such as the CWE description field/column
"""
cweList.sort()
idList = list()
freqList = list()
freq = 0
lastFreq = 0
tmp = ""
line = ""
tmp = cweList[0]
if tmp:
freq = 1
for line in cweList[1:]:
if line == tmp:
freq = freq + 1
else:
idList.append(tmp)
freqList.append(freq)
tmp = line
freq = 1
lastFreq = freq
idList.append(line)
freqList.append(lastFreq)
freqList, idList = zip(*sorted(zip(freqList, idList), reverse=True))
return (freqList, idList)
def main():
#command line interface
parser = argparse.ArgumentParser()
parser.add_argument('-n', '--namefile', help="a NIST CSV file \
which contains the CWE ID number data in column 1 \
and the CWE name data in column 2. This is used to map the \
CWE list to Names/Titles of the respective ID's")
parser.add_argument("outputfile", help="Output filename for CSV file.")
parser.add_argument("writemethod", choices=['w', 'a'], \
help="Output file write method:'w' flag to overwrite existing file or \
create a new file.'a' flag to append to existing file.")
parser.add_argument("inputfile", nargs='+', \
help="Input filename(s) for JSON input files (separate names \
by a space).")
args = parser.parse_args()
nameIn = args.namefile
fileOut = args.outputfile
fileInList = args.inputfile
if args.writemethod == 'a':
if os.path.isfile(args.outputfile) == False:
print("Write Method Error: cannot use 'a' option if outputfile \
does not exist")
print("Hint: change 'a' flag to 'w' flag to create a new output \
file (or overwrite an existing file!)")
sys.exit()
#save individual/raw CWE-ID references
cweList = list()
repeatList = list()
for fp in fileInList:
with open(fp, 'r') as fpIn:
try:
obj = json.load(fpIn)
except ValueError:
print("Error: failure when trying to load inputfile:" "'",fp,"'")
print("Hint: retry using an input file with a JSON format")
print("exiting now..")
sys.exit()
for item in obj['CVE_Items']:
num = 0
cve = item['cve']
probtype = cve['problemtype']
for field in probtype['problemtype_data']:
cwe = field['description']
for el in cwe:
cweList.append(el['value'])
repeatList.append(el['value'])
num = num + 1
if num > 1:
cve = cve['CVE_data_meta']
cve = cve['ID']
print(cve, "has %s CWEs" % num)
for i in repeatList:
print("\t %s" % i)
del repeatList[:]
row = str()
freqList = list()
idList = list()
#a tuple containing two list elements
freqList = sumFreq(cweList)
idList = freqList[1]
freqList = freqList[0]
#for each title/name found, save it to a corresponding CWE-ID
if nameIn is not None:
titleDict = dict()
titlef = open(nameIn, 'r', newline='')
readtitles = csv.reader(titlef)
next(readtitles, None) #skip CSV header line
for rowA in idList:
cweID_A = rowA[4:]
for rowB in readtitles:
title = rowB[1]
cweID_B = rowB[0]
if cweID_B == cweID_A:
titleDict[rowA] = title
titlef.seek(0,0)
titlef.close()
if args.writemethod == 'w':
csvFile = open(fileOut, 'w', newline='')
else:
csvFile = open(fileOut, 'a', newline='')
writer = csv.writer(csvFile)
if args.writemethod == 'w':
if nameIn is None:
writer.writerow(("CWE ID", "Frequency"))
else:
writer.writerow(("CWE ID", "Frequency", "Name"))
freqCount = 0
for i, val in enumerate(freqList):
if nameIn is None or idList[i] not in titleDict:
row = idList[i], str(val)
else:
row = idList[i], str(val), titleDict[idList[i]]
writer.writerow(row)
freqCount = freqCount + int(val)
csvFile.close()
print(freqCount, "CWE elements processed. Of those, %s were unique." % (i+1))
if __name__ == "__main__":
main()