aboutcode-org · ambuj-1211 · Jul 27, 2024 · Jul 27, 2024 · Aug 20, 2024 · Sep 15, 2024
diff --git a/vulnerabilities/importers/apache_httpd.py b/vulnerabilities/importers/apache_httpd.py
@@ -8,6 +8,7 @@
 #
 
 import logging
+import re
 import urllib
 
 import requests
@@ -23,6 +24,8 @@
 from vulnerabilities.importer import Reference
 from vulnerabilities.importer import VulnerabilitySeverity
 from vulnerabilities.severity_systems import APACHE_HTTPD
+from vulnerabilities.utils import create_weaknesses_list
+from vulnerabilities.utils import cwe_regex
 from vulnerabilities.utils import get_item
 
 logger = logging.getLogger(__name__)
@@ -102,11 +105,14 @@ def to_advisory(self, data):
                 )
             )
 
+        weaknesses = get_weaknesses(data)
+
         return AdvisoryData(
             aliases=[alias],
             summary=description or "",
             affected_packages=affected_packages,
             references=[reference],
+            weaknesses=weaknesses,
             url=reference.url,
         )
 
@@ -152,3 +158,97 @@ def fetch_links(url):
             continue
         links.append(urllib.parse.urljoin(url, link))
     return links
+
+
+def get_weaknesses(cve_data):
+    """
+    Extract CWE IDs from CVE data.
+
+    Args:
+        cve_data (dict): The CVE data in a dictionary format.
+
+    Returns:
+        List[int]: A list of unique CWE IDs.
+
+    Examples:
+        >>> mock_cve_data1 = {
+        ...     "containers": {
+        ...         "cna": {
+        ...             "providerMetadata": {
+        ...                 "orgId": "f0158376-9dc2-43b6-827c-5f631a4d8d09"
+        ...             },
+        ...             "title": "mod_macro buffer over-read",
+        ...             "problemTypes": [
+        ...                 {
+        ...                     "descriptions": [
+        ...                         {
+        ...                             "description": "CWE-125 Out-of-bounds Read",
+        ...                             "lang": "en",
+        ...                             "cweId": "CWE-125",
+        ...                             "type": "CWE"
+        ...                         }
+        ...                     ]
+        ...                 }
+        ...             ]
+        ...         }
+        ...     }
+        ... }
+        >>> mock_cve_data2 = {
+        ...     "data_type": "CVE",
+        ...     "data_format": "MITRE",
+        ...     "data_version": "4.0",
+        ...     "generator": {
+        ...         "engine": "Vulnogram 0.0.9"
+        ...     },
+        ...     "CVE_data_meta": {
+        ...         "ID": "CVE-2022-28614",
+        ...         "ASSIGNER": "[email protected]",
+        ...         "TITLE": "read beyond bounds via ap_rwrite() ",
+        ...         "STATE": "PUBLIC"
+        ...     },
+        ...     "problemtype": {
+        ...         "problemtype_data": [
+        ...             {
+        ...                 "description": [
+        ...                     {
+        ...                         "lang": "eng",
+        ...                         "value": "CWE-190 Integer Overflow or Wraparound"
+        ...                     }
+        ...                 ]
+        ...             },
+        ...             {
+        ...                 "description": [
+        ...                     {
+        ...                         "lang": "eng",
+        ...                         "value": "CWE-200 Exposure of Sensitive Information to an Unauthorized Actor"
+        ...                     }
+        ...                 ]
+        ...             }
+        ...         ]
+        ...     }
+        ... }
+
+        >>> get_weaknesses(mock_cve_data1)
+        [125]
+
+        >>> get_weaknesses(mock_cve_data2)
+        [190, 200]
+    """
+    alias = get_item(cve_data, "CVE_data_meta", "ID")
+    cwe_strings = []
+    if alias:
+        problemtype_data = get_item(cve_data, "problemtype", "problemtype_data") or []
+        for problem in problemtype_data:
+            for desc in problem.get("description", []):
+                value = desc.get("value", "")
+                cwe_id_string_list = re.findall(cwe_regex, value)
+                cwe_strings.extend(cwe_id_string_list)
+    else:
+        problemTypes = cve_data.get("containers", {}).get("cna", {}).get("problemTypes", [])
+        descriptions = problemTypes[0].get("descriptions", []) if len(problemTypes) > 0 else []
+        for description in descriptions:
+            cwe_id_string = description.get("cweId", "")
+            cwe_strings.append(cwe_id_string)
+
+    weaknesses = create_weaknesses_list(cwe_strings)
+    return weaknesses
diff --git a/vulnerabilities/importers/debian.py b/vulnerabilities/importers/debian.py
@@ -8,12 +8,14 @@
 #
 
 import logging
+import re
 from typing import Any
 from typing import Iterable
 from typing import List
 from typing import Mapping
 
 import requests
+from cwe2.database import Database
 from packageurl import PackageURL
 from univers.version_range import DebianVersionRange
 from univers.versions import DebianVersion
@@ -22,6 +24,7 @@
 from vulnerabilities.importer import AffectedPackage
 from vulnerabilities.importer import Importer
 from vulnerabilities.importer import Reference
+from vulnerabilities.utils import create_weaknesses_list
 from vulnerabilities.utils import dedupe
 from vulnerabilities.utils import get_item
 
@@ -93,6 +96,7 @@ def advisory_data(self) -> Iterable[AdvisoryData]:
             yield from self.parse(pkg_name, records)
 
     def parse(self, pkg_name: str, records: Mapping[str, Any]) -> Iterable[AdvisoryData]:
+
         for cve_id, record in records.items():
             affected_versions = []
             fixed_versions = []
@@ -150,10 +154,29 @@ def parse(self, pkg_name: str, records: Mapping[str, Any]) -> Iterable[AdvisoryD
                         fixed_version=DebianVersion(fixed_version),
                     )
                 )
+            weaknesses = get_cwe_from_debian_advisory(record)
+
             yield AdvisoryData(
                 aliases=[cve_id],
                 summary=record.get("description", ""),
                 affected_packages=affected_packages,
                 references=references,
+                weaknesses=weaknesses,
                 url=self.api_url,
             )
+
+
+def get_cwe_from_debian_advisory(record):
+    """
+    Extracts CWE ID strings from the given raw_data and returns a list of CWE IDs.
+
+        >>> get_cwe_from_debian_advisory({"description":"PEAR HTML_QuickForm version 3.2.14 contains an eval injection (CWE-95) vulnerability in HTML_QuickForm's getSubmitValue method, HTML_QuickForm's validate method, HTML_QuickForm_hierselect's _setOptions method, HTML_QuickForm_element's _findValue method, HTML_QuickForm_element's _prepareValue method. that can result in Possible information disclosure, possible impact on data integrity and execution of arbitrary code. This attack appear to be exploitable via A specially crafted query string could be utilised, e.g. http://www.example.com/admin/add_practice_type_id[1]=fubar%27])%20OR%20die(%27OOK!%27);%20//&mode=live. This vulnerability appears to have been fixed in 3.2.15."})
+        [95]
+        >>> get_cwe_from_debian_advisory({"description":"There is no WEAKNESS DATA"})
+        []
+    """
+    description = record.get("description") or ""
+    pattern = r"CWE-\d+"
+    cwe_strings = re.findall(pattern, description)
+    weaknesses = create_weaknesses_list(cwe_strings)
+    return weaknesses
diff --git a/vulnerabilities/importers/fireeye.py b/vulnerabilities/importers/fireeye.py
@@ -16,6 +16,8 @@
 from vulnerabilities.importer import Importer
 from vulnerabilities.importer import Reference
 from vulnerabilities.utils import build_description
+from vulnerabilities.utils import create_weaknesses_list
+from vulnerabilities.utils import cwe_regex
 from vulnerabilities.utils import dedupe
 
 logger = logging.getLogger(__name__)
@@ -77,10 +79,13 @@ def parse_advisory_data(raw_data, file, base_path) -> AdvisoryData:
     disc_credits = md_dict.get("## Discovery Credits")  # not used
     disc_timeline = md_dict.get("## Disclosure Timeline")  # not used
     references = md_dict.get("## References") or []
+    cwe_data = md_dict.get("## Common Weakness Enumeration") or []
+
     return AdvisoryData(
         aliases=get_aliases(database_id, cve_ref),
         summary=build_description(" ".join(summary), " ".join(description)),
         references=get_references(references),
+        weaknesses=get_weaknesses(cwe_data),
         url=advisory_url,
     )
 
@@ -140,3 +145,22 @@ def md_list_to_dict(md_list):
         else:
             md_dict[md_key].append(md_line)
     return md_dict
+
+
+def get_weaknesses(cwe_data):
+    """
+    Return the list of CWE IDs as integers from a list of weakness summaries, e.g., [379].
+
+        >>> get_weaknesses([
+        ... "CWE-379: Creation of Temporary File in Directory with Insecure Permissions",
+        ... "CWE-362: Concurrent Execution using Shared Resource with Improper Synchronization ('Race Condition')"
+        ... ])
+        [379, 362]
+    """
+    cwe_list = []
+    for line in cwe_data:
+        cwe_ids = re.findall(cwe_regex, line)
+        cwe_list.extend(cwe_ids)
+
+    weaknesses = create_weaknesses_list(cwe_list)
+    return weaknesses
diff --git a/vulnerabilities/tests/test_data/apache_httpd/CVE-2021-44224-apache-httpd-expected.json b/vulnerabilities/tests/test_data/apache_httpd/CVE-2021-44224-apache-httpd-expected.json
@@ -32,6 +32,6 @@
     }
   ],
   "date_published": null,
-  "weaknesses": [],
+  "weaknesses": [476],
   "url": "https://httpd.apache.org/security/json/CVE-2021-44224.json"
 }
diff --git a/vulnerabilities/tests/test_data/apache_httpd/CVE-2022-28614-apache-httpd-expected.json b/vulnerabilities/tests/test_data/apache_httpd/CVE-2022-28614-apache-httpd-expected.json
@@ -32,6 +32,6 @@
     }
   ],
   "date_published": null,
-  "weaknesses": [],
+  "weaknesses": [190, 200],
   "url": "https://httpd.apache.org/security/json/CVE-2022-28614.json"
 }
diff --git a/vulnerabilities/tests/test_data/apache_httpd/apache-httpd-improver-expected.json b/vulnerabilities/tests/test_data/apache_httpd/apache-httpd-improver-expected.json
@@ -54,7 +54,7 @@
         ]
       }
     ],
-    "weaknesses": []
+    "weaknesses": [476]
   },
   {
     "vulnerability_id": null,
@@ -103,6 +103,6 @@
         ]
       }
     ],
-    "weaknesses": []
+    "weaknesses": [476]
   }
 ]
diff --git a/vulnerabilities/tests/test_debian.py b/vulnerabilities/tests/test_debian.py
@@ -6,13 +6,14 @@
 # See https://github.com/aboutcode-org/vulnerablecode for support or download.
 # See https://aboutcode.org for more information about nexB OSS projects.
 #
-
 import json
 import os
+import re
 from unittest.mock import patch
 
 from vulnerabilities.importer import AdvisoryData
 from vulnerabilities.importers.debian import DebianImporter
+from vulnerabilities.importers.debian import get_cwe_from_debian_advisory
 from vulnerabilities.improvers.default import DefaultImprover
 from vulnerabilities.improvers.valid_versions import DebianBasicImprover
 from vulnerabilities.tests import util_tests
@@ -55,3 +56,38 @@ def test_debian_improver(mock_response):
             result.extend(inference)
     expected_file = os.path.join(TEST_DATA, f"debian-improver-expected.json")
     util_tests.check_results_against_json(result, expected_file)
+
+
+def test_get_cwe_from_debian_advisories():
+    record = {
+        "description": "Legion of the Bouncy Castle Legion of the Bouncy Castle Java Cryptography APIs 1.58 up to but not including 1.60 contains a CWE-580: Use of Externally-Controlled Input to Select Classes or Code ('Unsafe Reflection') vulnerability in XMSS/XMSS^MT private key deserialization that can result in Deserializing an XMSS/XMSS^MT private key can result in the execution of unexpected code. This attack appear to be exploitable via A handcrafted private key can include references to unexpected classes which will be picked up from the class path for the executing application. This vulnerability appears to have been fixed in 1.60 and later.",
+        "scope": "local",
+        "releases": {
+            "bookworm": {
+                "status": "resolved",
+                "repositories": {"bookworm": "1.72-2"},
+                "fixed_version": "1.60-1",
+                "urgency": "low",
+            },
+            "bullseye": {
+                "status": "resolved",
+                "repositories": {"bullseye": "1.68-2"},
+                "fixed_version": "1.60-1",
+                "urgency": "low",
+            },
+            "sid": {
+                "status": "resolved",
+                "repositories": {"sid": "1.77-1"},
+                "fixed_version": "1.60-1",
+                "urgency": "low",
+            },
+            "trixie": {
+                "status": "resolved",
+                "repositories": {"trixie": "1.77-1"},
+                "fixed_version": "1.60-1",
+                "urgency": "low",
+            },
+        },
+    }
+    result = get_cwe_from_debian_advisory(record)
+    assert result == [580]
diff --git a/vulnerabilities/tests/test_fireeye.py b/vulnerabilities/tests/test_fireeye.py
@@ -13,6 +13,7 @@
 from vulnerabilities.importer import Reference
 from vulnerabilities.importers.fireeye import get_aliases
 from vulnerabilities.importers.fireeye import get_references
+from vulnerabilities.importers.fireeye import get_weaknesses
 from vulnerabilities.importers.fireeye import md_list_to_dict
 from vulnerabilities.importers.fireeye import parse_advisory_data
 from vulnerabilities.tests import util_tests
@@ -217,3 +218,19 @@ def test_md_list_to_dict_2(self):
             md_list = f.readlines()
             md_dict = md_list_to_dict(md_list)
             assert md_dict == expected_output
+
+    def test_get_weaknesses(self):
+        assert get_weaknesses(
+            [
+                "CWE-379: Creation of Temporary File in Directory with Insecure Permissions",
+                "CWE-362: Concurrent Execution using Shared Resource with Improper Synchronization ('Race Condition')",
+            ]
+        ) == [379, 362]
+        assert (
+            get_weaknesses(
+                [
+                    "CWE-2345: This cwe id does not exist so it should generate Invalid CWE id error and return empty list."
+                ]
+            )
+            == []
+        )