diff --git a/vulnerabilities/importers/apache_httpd.py b/vulnerabilities/importers/apache_httpd.py index 3dc286803..75099ab8f 100644 --- a/vulnerabilities/importers/apache_httpd.py +++ b/vulnerabilities/importers/apache_httpd.py @@ -8,6 +8,7 @@ # import logging +import re import urllib import requests @@ -23,6 +24,8 @@ from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity from vulnerabilities.severity_systems import APACHE_HTTPD +from vulnerabilities.utils import create_weaknesses_list +from vulnerabilities.utils import cwe_regex from vulnerabilities.utils import get_item logger = logging.getLogger(__name__) @@ -102,11 +105,14 @@ def to_advisory(self, data): ) ) + weaknesses = get_weaknesses(data) + return AdvisoryData( aliases=[alias], summary=description or "", affected_packages=affected_packages, references=[reference], + weaknesses=weaknesses, url=reference.url, ) @@ -152,3 +158,97 @@ def fetch_links(url): continue links.append(urllib.parse.urljoin(url, link)) return links + + +def get_weaknesses(cve_data): + """ + Extract CWE IDs from CVE data. + + Args: + cve_data (dict): The CVE data in a dictionary format. + + Returns: + List[int]: A list of unique CWE IDs. + + Examples: + >>> mock_cve_data1 = { + ... "containers": { + ... "cna": { + ... "providerMetadata": { + ... "orgId": "f0158376-9dc2-43b6-827c-5f631a4d8d09" + ... }, + ... "title": "mod_macro buffer over-read", + ... "problemTypes": [ + ... { + ... "descriptions": [ + ... { + ... "description": "CWE-125 Out-of-bounds Read", + ... "lang": "en", + ... "cweId": "CWE-125", + ... "type": "CWE" + ... } + ... ] + ... } + ... ] + ... } + ... } + ... } + >>> mock_cve_data2 = { + ... "data_type": "CVE", + ... "data_format": "MITRE", + ... "data_version": "4.0", + ... "generator": { + ... "engine": "Vulnogram 0.0.9" + ... }, + ... "CVE_data_meta": { + ... "ID": "CVE-2022-28614", + ... "ASSIGNER": "security@apache.org", + ... "TITLE": "read beyond bounds via ap_rwrite() ", + ... "STATE": "PUBLIC" + ... }, + ... "problemtype": { + ... "problemtype_data": [ + ... { + ... "description": [ + ... { + ... "lang": "eng", + ... "value": "CWE-190 Integer Overflow or Wraparound" + ... } + ... ] + ... }, + ... { + ... "description": [ + ... { + ... "lang": "eng", + ... "value": "CWE-200 Exposure of Sensitive Information to an Unauthorized Actor" + ... } + ... ] + ... } + ... ] + ... } + ... } + + >>> get_weaknesses(mock_cve_data1) + [125] + + >>> get_weaknesses(mock_cve_data2) + [190, 200] + """ + alias = get_item(cve_data, "CVE_data_meta", "ID") + cwe_strings = [] + if alias: + problemtype_data = get_item(cve_data, "problemtype", "problemtype_data") or [] + for problem in problemtype_data: + for desc in problem.get("description", []): + value = desc.get("value", "") + cwe_id_string_list = re.findall(cwe_regex, value) + cwe_strings.extend(cwe_id_string_list) + else: + problemTypes = cve_data.get("containers", {}).get("cna", {}).get("problemTypes", []) + descriptions = problemTypes[0].get("descriptions", []) if len(problemTypes) > 0 else [] + for description in descriptions: + cwe_id_string = description.get("cweId", "") + cwe_strings.append(cwe_id_string) + + weaknesses = create_weaknesses_list(cwe_strings) + return weaknesses diff --git a/vulnerabilities/importers/debian.py b/vulnerabilities/importers/debian.py index e29c9b788..7d1ae2071 100644 --- a/vulnerabilities/importers/debian.py +++ b/vulnerabilities/importers/debian.py @@ -8,12 +8,14 @@ # import logging +import re from typing import Any from typing import Iterable from typing import List from typing import Mapping import requests +from cwe2.database import Database from packageurl import PackageURL from univers.version_range import DebianVersionRange from univers.versions import DebianVersion @@ -22,6 +24,7 @@ from vulnerabilities.importer import AffectedPackage from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference +from vulnerabilities.utils import create_weaknesses_list from vulnerabilities.utils import dedupe from vulnerabilities.utils import get_item @@ -93,6 +96,7 @@ def advisory_data(self) -> Iterable[AdvisoryData]: yield from self.parse(pkg_name, records) def parse(self, pkg_name: str, records: Mapping[str, Any]) -> Iterable[AdvisoryData]: + for cve_id, record in records.items(): affected_versions = [] fixed_versions = [] @@ -150,10 +154,29 @@ def parse(self, pkg_name: str, records: Mapping[str, Any]) -> Iterable[AdvisoryD fixed_version=DebianVersion(fixed_version), ) ) + weaknesses = get_cwe_from_debian_advisory(record) + yield AdvisoryData( aliases=[cve_id], summary=record.get("description", ""), affected_packages=affected_packages, references=references, + weaknesses=weaknesses, url=self.api_url, ) + + +def get_cwe_from_debian_advisory(record): + """ + Extracts CWE ID strings from the given raw_data and returns a list of CWE IDs. + + >>> get_cwe_from_debian_advisory({"description":"PEAR HTML_QuickForm version 3.2.14 contains an eval injection (CWE-95) vulnerability in HTML_QuickForm's getSubmitValue method, HTML_QuickForm's validate method, HTML_QuickForm_hierselect's _setOptions method, HTML_QuickForm_element's _findValue method, HTML_QuickForm_element's _prepareValue method. that can result in Possible information disclosure, possible impact on data integrity and execution of arbitrary code. This attack appear to be exploitable via A specially crafted query string could be utilised, e.g. http://www.example.com/admin/add_practice_type_id[1]=fubar%27])%20OR%20die(%27OOK!%27);%20//&mode=live. This vulnerability appears to have been fixed in 3.2.15."}) + [95] + >>> get_cwe_from_debian_advisory({"description":"There is no WEAKNESS DATA"}) + [] + """ + description = record.get("description") or "" + pattern = r"CWE-\d+" + cwe_strings = re.findall(pattern, description) + weaknesses = create_weaknesses_list(cwe_strings) + return weaknesses diff --git a/vulnerabilities/importers/fireeye.py b/vulnerabilities/importers/fireeye.py index 34daf97e0..03fb3a8d5 100644 --- a/vulnerabilities/importers/fireeye.py +++ b/vulnerabilities/importers/fireeye.py @@ -16,6 +16,8 @@ from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.utils import build_description +from vulnerabilities.utils import create_weaknesses_list +from vulnerabilities.utils import cwe_regex from vulnerabilities.utils import dedupe logger = logging.getLogger(__name__) @@ -77,10 +79,13 @@ def parse_advisory_data(raw_data, file, base_path) -> AdvisoryData: disc_credits = md_dict.get("## Discovery Credits") # not used disc_timeline = md_dict.get("## Disclosure Timeline") # not used references = md_dict.get("## References") or [] + cwe_data = md_dict.get("## Common Weakness Enumeration") or [] + return AdvisoryData( aliases=get_aliases(database_id, cve_ref), summary=build_description(" ".join(summary), " ".join(description)), references=get_references(references), + weaknesses=get_weaknesses(cwe_data), url=advisory_url, ) @@ -140,3 +145,22 @@ def md_list_to_dict(md_list): else: md_dict[md_key].append(md_line) return md_dict + + +def get_weaknesses(cwe_data): + """ + Return the list of CWE IDs as integers from a list of weakness summaries, e.g., [379]. + + >>> get_weaknesses([ + ... "CWE-379: Creation of Temporary File in Directory with Insecure Permissions", + ... "CWE-362: Concurrent Execution using Shared Resource with Improper Synchronization ('Race Condition')" + ... ]) + [379, 362] + """ + cwe_list = [] + for line in cwe_data: + cwe_ids = re.findall(cwe_regex, line) + cwe_list.extend(cwe_ids) + + weaknesses = create_weaknesses_list(cwe_list) + return weaknesses diff --git a/vulnerabilities/tests/test_data/apache_httpd/CVE-2021-44224-apache-httpd-expected.json b/vulnerabilities/tests/test_data/apache_httpd/CVE-2021-44224-apache-httpd-expected.json index 6f8dee434..60385bae4 100644 --- a/vulnerabilities/tests/test_data/apache_httpd/CVE-2021-44224-apache-httpd-expected.json +++ b/vulnerabilities/tests/test_data/apache_httpd/CVE-2021-44224-apache-httpd-expected.json @@ -32,6 +32,6 @@ } ], "date_published": null, - "weaknesses": [], + "weaknesses": [476], "url": "https://httpd.apache.org/security/json/CVE-2021-44224.json" } \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/apache_httpd/CVE-2022-28614-apache-httpd-expected.json b/vulnerabilities/tests/test_data/apache_httpd/CVE-2022-28614-apache-httpd-expected.json index dfbddbc8b..781a9a84b 100644 --- a/vulnerabilities/tests/test_data/apache_httpd/CVE-2022-28614-apache-httpd-expected.json +++ b/vulnerabilities/tests/test_data/apache_httpd/CVE-2022-28614-apache-httpd-expected.json @@ -32,6 +32,6 @@ } ], "date_published": null, - "weaknesses": [], + "weaknesses": [190, 200], "url": "https://httpd.apache.org/security/json/CVE-2022-28614.json" } \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/apache_httpd/apache-httpd-improver-expected.json b/vulnerabilities/tests/test_data/apache_httpd/apache-httpd-improver-expected.json index 2aeb3dd70..5c46fe2e6 100644 --- a/vulnerabilities/tests/test_data/apache_httpd/apache-httpd-improver-expected.json +++ b/vulnerabilities/tests/test_data/apache_httpd/apache-httpd-improver-expected.json @@ -54,7 +54,7 @@ ] } ], - "weaknesses": [] + "weaknesses": [476] }, { "vulnerability_id": null, @@ -103,6 +103,6 @@ ] } ], - "weaknesses": [] + "weaknesses": [476] } ] \ No newline at end of file diff --git a/vulnerabilities/tests/test_debian.py b/vulnerabilities/tests/test_debian.py index 25bbcb04d..add2ea443 100644 --- a/vulnerabilities/tests/test_debian.py +++ b/vulnerabilities/tests/test_debian.py @@ -6,13 +6,14 @@ # See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # - import json import os +import re from unittest.mock import patch from vulnerabilities.importer import AdvisoryData from vulnerabilities.importers.debian import DebianImporter +from vulnerabilities.importers.debian import get_cwe_from_debian_advisory from vulnerabilities.improvers.default import DefaultImprover from vulnerabilities.improvers.valid_versions import DebianBasicImprover from vulnerabilities.tests import util_tests @@ -55,3 +56,38 @@ def test_debian_improver(mock_response): result.extend(inference) expected_file = os.path.join(TEST_DATA, f"debian-improver-expected.json") util_tests.check_results_against_json(result, expected_file) + + +def test_get_cwe_from_debian_advisories(): + record = { + "description": "Legion of the Bouncy Castle Legion of the Bouncy Castle Java Cryptography APIs 1.58 up to but not including 1.60 contains a CWE-580: Use of Externally-Controlled Input to Select Classes or Code ('Unsafe Reflection') vulnerability in XMSS/XMSS^MT private key deserialization that can result in Deserializing an XMSS/XMSS^MT private key can result in the execution of unexpected code. This attack appear to be exploitable via A handcrafted private key can include references to unexpected classes which will be picked up from the class path for the executing application. This vulnerability appears to have been fixed in 1.60 and later.", + "scope": "local", + "releases": { + "bookworm": { + "status": "resolved", + "repositories": {"bookworm": "1.72-2"}, + "fixed_version": "1.60-1", + "urgency": "low", + }, + "bullseye": { + "status": "resolved", + "repositories": {"bullseye": "1.68-2"}, + "fixed_version": "1.60-1", + "urgency": "low", + }, + "sid": { + "status": "resolved", + "repositories": {"sid": "1.77-1"}, + "fixed_version": "1.60-1", + "urgency": "low", + }, + "trixie": { + "status": "resolved", + "repositories": {"trixie": "1.77-1"}, + "fixed_version": "1.60-1", + "urgency": "low", + }, + }, + } + result = get_cwe_from_debian_advisory(record) + assert result == [580] diff --git a/vulnerabilities/tests/test_fireeye.py b/vulnerabilities/tests/test_fireeye.py index f3e3bb862..320a660a7 100644 --- a/vulnerabilities/tests/test_fireeye.py +++ b/vulnerabilities/tests/test_fireeye.py @@ -13,6 +13,7 @@ from vulnerabilities.importer import Reference from vulnerabilities.importers.fireeye import get_aliases from vulnerabilities.importers.fireeye import get_references +from vulnerabilities.importers.fireeye import get_weaknesses from vulnerabilities.importers.fireeye import md_list_to_dict from vulnerabilities.importers.fireeye import parse_advisory_data from vulnerabilities.tests import util_tests @@ -217,3 +218,19 @@ def test_md_list_to_dict_2(self): md_list = f.readlines() md_dict = md_list_to_dict(md_list) assert md_dict == expected_output + + def test_get_weaknesses(self): + assert get_weaknesses( + [ + "CWE-379: Creation of Temporary File in Directory with Insecure Permissions", + "CWE-362: Concurrent Execution using Shared Resource with Improper Synchronization ('Race Condition')", + ] + ) == [379, 362] + assert ( + get_weaknesses( + [ + "CWE-2345: This cwe id does not exist so it should generate Invalid CWE id error and return empty list." + ] + ) + == [] + ) diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index 969a08f2f..37ac8c6fc 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -29,6 +29,8 @@ import saneyaml import toml import urllib3 +from cwe2.database import Database +from cwe2.database import InvalidCWEError from packageurl import PackageURL from packageurl.contrib.django.utils import without_empty_values from univers.version_range import RANGE_CLASS_BY_SCHEMES @@ -42,6 +44,7 @@ cve_regex = re.compile(r"CVE-[0-9]{4}-[0-9]{4,19}", re.IGNORECASE) is_cve = cve_regex.match find_all_cve = cve_regex.findall +cwe_regex = r"CWE-\d+" @dataclasses.dataclass(order=True, frozen=True) @@ -399,6 +402,29 @@ def get_cwe_id(cwe_string: str) -> int: return int(cwe_id) +def create_weaknesses_list(cwe_strings: str): + """ + Convert the CWE string to CWE ids and store them to weaknesses list. + >>> create_weaknesses_list(["CWE-125","CWE-379"]) + [125, 379] + """ + weaknesses = [] + db = Database() + for cwe_string in cwe_strings: + if not cwe_string: + continue + cwe_id = get_cwe_id(cwe_string) + if not cwe_id: + logger.error("Invalid CWE id: No CWE ID found") + continue + try: + db.get(cwe_id) + weaknesses.append(cwe_id) + except InvalidCWEError as e: + logger.error(f"Error: {e}") + return weaknesses + + def clean_nginx_git_tag(tag): """ Return a cleaned ``version`` string from an nginx git tag.