Skip to content

Commit

Permalink
Add vulnerability support for discovered dependencies aboutcode-org#835
Browse files Browse the repository at this point in the history
… (aboutcode-org#846)

Signed-off-by: Thomas Druez <[email protected]>
  • Loading branch information
tdruez authored Aug 3, 2023
1 parent a215650 commit bda3a70
Show file tree
Hide file tree
Showing 18 changed files with 176 additions and 85 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@ v32.6.0 (unreleased)
creation REST API.
https://github.com/nexB/scancode.io/issues/828

- Update the ``fetch_vulnerabilities`` pipe to make the API requests by batch of purls.
https://github.com/nexB/scancode.io/issues/835

- Add vulnerability support for discovered dependencies.
The dependency data is loaded using the ``find_vulnerabilities`` pipeline backed by
a VulnerableCode database.
https://github.com/nexB/scancode.io/issues/835

v32.5.0 (2023-08-02)
--------------------

Expand Down
2 changes: 2 additions & 0 deletions scancodeio/context_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
from scancode_config import __version__ as scancode_toolkit_version

from scancodeio import __version__ as scancodeio_version
from scancodeio import settings


def versions(request):
return {
"SCANCODEIO_VERSION": scancodeio_version.lstrip("v"),
"SCANCODE_TOOLKIT_VERSION": scancode_toolkit_version,
"VULNERABLECODE_URL": settings.VULNERABLECODE_URL,
}
1 change: 1 addition & 0 deletions scanpipe/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,7 @@ class Meta:
"datafile_path",
"datasource_id",
"package_type",
"affected_by_vulnerabilities",
]


Expand Down
3 changes: 3 additions & 0 deletions scanpipe/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
"is_optional",
"is_resolved",
"datasource_id",
"is_vulnerable",
]

search = django_filters.CharFilter(
Expand All @@ -589,6 +590,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
is_runtime = StrictBooleanFilter()
is_optional = StrictBooleanFilter()
is_resolved = StrictBooleanFilter()
is_vulnerable = IsVulnerable(field_name="affected_by_vulnerabilities")

class Meta:
model = DiscoveredDependency
Expand All @@ -607,6 +609,7 @@ class Meta:
"is_optional",
"is_resolved",
"datasource_id",
"is_vulnerable",
]


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Generated by Django 4.2.3 on 2023-08-02 10:43

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("scanpipe", "0039_discoveredpackage_compliance_alert_and_more"),
]

operations = [
migrations.AddField(
model_name="discovereddependency",
name="affected_by_vulnerabilities",
field=models.JSONField(blank=True, default=list),
),
]
46 changes: 30 additions & 16 deletions scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1025,6 +1025,11 @@ def vulnerable_package_count(self):
"""Return the number of vulnerable packages related to this project."""
return self.discoveredpackages.vulnerable().count()

@cached_property
def vulnerable_dependency_count(self):
"""Return the number of vulnerable dependencies related to this project."""
return self.discovereddependencies.vulnerable().count()

@cached_property
def dependency_count(self):
"""Return the number of dependencies related to this project."""
Expand Down Expand Up @@ -2245,11 +2250,31 @@ def __str__(self):
return f"{self.from_resource.pk} > {self.to_resource.pk} using {self.map_type}"


class DiscoveredPackageQuerySet(PackageURLQuerySetMixin, ProjectRelatedQuerySet):
class VulnerabilityMixin(models.Model):
"""Add the vulnerability related fields and methods."""

affected_by_vulnerabilities = models.JSONField(blank=True, default=list)

@property
def is_vulnerable(self):
"""Returns True if this instance is affected by vulnerabilities."""
return bool(self.affected_by_vulnerabilities)

class Meta:
abstract = True


class VulnerabilityQuerySetMixin:
def vulnerable(self):
return self.filter(~Q(affected_by_vulnerabilities__in=EMPTY_VALUES))


class DiscoveredPackageQuerySet(
VulnerabilityQuerySetMixin, PackageURLQuerySetMixin, ProjectRelatedQuerySet
):
pass


class AbstractPackage(models.Model):
"""These fields should be kept in line with `packagedcode.models.PackageData`."""

Expand Down Expand Up @@ -2446,20 +2471,6 @@ class Meta:
abstract = True


class VulnerabilityMixin(models.Model):
"""Add the vulnerability related fields and methods."""

affected_by_vulnerabilities = models.JSONField(blank=True, default=list)

@property
def is_vulnerable(self):
"""Returns True if this instance is affected by vulnerabilities."""
return bool(self.affected_by_vulnerabilities)

class Meta:
abstract = True


class DiscoveredPackage(
ProjectRelatedModel,
ExtraDataFieldMixin,
Expand Down Expand Up @@ -2746,7 +2757,9 @@ def as_cyclonedx(self):
)


class DiscoveredDependencyQuerySet(PackageURLQuerySetMixin, ProjectRelatedQuerySet):
class DiscoveredDependencyQuerySet(
PackageURLQuerySetMixin, VulnerabilityQuerySetMixin, ProjectRelatedQuerySet
):
def prefetch_for_serializer(self):
"""
Optimized prefetching for a QuerySet to be consumed by the
Expand All @@ -2767,6 +2780,7 @@ class DiscoveredDependency(
ProjectRelatedModel,
SaveProjectErrorMixin,
UpdateFromDataMixin,
VulnerabilityMixin,
PackageURLMixin,
):
"""
Expand Down
16 changes: 11 additions & 5 deletions scanpipe/pipelines/find_vulnerabilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,17 @@

class FindVulnerabilities(Pipeline):
"""
Find vulnerabilities for discovered packages in the VulnerableCode database.
Find vulnerabilities for packages and dependencies in the VulnerableCode database.
Vulnerability data is stored on each package instance.
Vulnerability data is stored on each package and dependency instance.
"""

@classmethod
def steps(cls):
return (
cls.check_vulnerablecode_service_availability,
cls.lookup_vulnerabilities,
cls.lookup_packages_vulnerabilities,
cls.lookup_dependencies_vulnerabilities,
)

def check_vulnerablecode_service_availability(self):
Expand All @@ -46,7 +47,12 @@ def check_vulnerablecode_service_availability(self):
if not vulnerablecode.is_available():
raise Exception("VulnerableCode is not available.")

def lookup_vulnerabilities(self):
def lookup_packages_vulnerabilities(self):
"""Check for vulnerabilities for each of the project's discovered package."""
packages = self.project.discoveredpackages.all()
vulnerablecode.fetch_vulnerabilities(packages)
vulnerablecode.fetch_vulnerabilities(packages, logger=self.log)

def lookup_dependencies_vulnerabilities(self):
"""Check for vulnerabilities for each of the project's discovered dependency."""
dependencies = self.project.discovereddependencies.filter(is_resolved=True)
vulnerablecode.fetch_vulnerabilities(dependencies, logger=self.log)
67 changes: 40 additions & 27 deletions scanpipe/pipes/vulnerablecode.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,22 @@ def is_available():
return response.status_code == requests.codes.ok


def get_purls(packages):
def chunked(iterable, chunk_size):
"""
Return the PURLs for the given list of `packages`.
Do not include qualifiers nor subpath when `base` is provided.
Break an `iterable` into lists of `chunk_size` length.
>>> list(chunked([1, 2, 3, 4, 5], 2))
[[1, 2], [3, 4], [5]]
>>> list(chunked([1, 2, 3, 4, 5], 3))
[[1, 2, 3], [4, 5]]
"""
for index in range(0, len(iterable), chunk_size):
end = index + chunk_size
yield iterable[index:end]


def get_purls(packages):
"""Return the PURLs for the given list of `packages`."""
return [package_url for package in packages if (package_url := package.package_url)]


Expand Down Expand Up @@ -168,6 +179,7 @@ def bulk_search_by_purl(

data = {
"purls": purls,
"vulnerabilities_only": True,
}

logger.debug(f"VulnerableCode: url={url} purls_count={len(purls)}")
Expand All @@ -190,32 +202,33 @@ def bulk_search_by_cpes(
return request_post(url, data, timeout)


def get_unique_vulnerabilities(packages_data):
def fetch_vulnerabilities(packages, chunk_size=1000, logger=logger.info):
"""
Return the unique instance of vulnerabilities for the provided ``packages_data``.
Note this should be implemented on the VulnerableCode side, see:
https://github.com/nexB/vulnerablecode/issues/1219#issuecomment-1620123301
Fetch and store vulnerabilities for each provided ``packages``.
The PURLs are used for the lookups in batch of ``chunk_size`` per request.
"""
if not packages_data:
return

unique_vulnerabilities = []
seen_vulnerability_ids = set()

for package_entry in packages_data:
for vulnerability in package_entry.get("affected_by_vulnerabilities", []):
vulnerability_id = vulnerability.get("vulnerability_id")
if vulnerability_id not in seen_vulnerability_ids:
unique_vulnerabilities.append(vulnerability)
seen_vulnerability_ids.add(vulnerability_id)

return unique_vulnerabilities
vulnerabilities_by_purl = {}

for purls_batch in chunked(get_purls(packages), chunk_size):
response_data = bulk_search_by_purl(purls_batch)
for vulnerability_data in response_data:
vulnerabilities_by_purl[vulnerability_data["purl"]] = vulnerability_data

def fetch_vulnerabilities(packages):
"""Fetch and store vulnerabilities for each provided ``packages``."""
unsaved_objects = []
for package in packages:
if packages_data := get_vulnerabilities_by_purl(package.package_url):
if unique_vulnerabilities := get_unique_vulnerabilities(packages_data):
package.update(affected_by_vulnerabilities=unique_vulnerabilities)
if package_data := vulnerabilities_by_purl.get(package.package_url):
if affected_by := package_data.get("affected_by_vulnerabilities", []):
package.affected_by_vulnerabilities = affected_by
unsaved_objects.append(package)

if unsaved_objects:
model_class = unsaved_objects[0].__class__
model_class.objects.bulk_update(
objs=unsaved_objects,
fields=["affected_by_vulnerabilities"],
batch_size=1000,
)
logger(
f"{len(unsaved_objects)} {model_class._meta.verbose_name_plural} updated "
f"with vulnerability data."
)
5 changes: 5 additions & 0 deletions scanpipe/templates/scanpipe/dependency_list.html
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@
<tr class="break-word">
<td style="min-width: 300px;" title="{{ dependency.dependency_uid }}">
<a href="{{ dependency.get_absolute_url }}">{{ dependency.purl }}</a>
{% if dependency.is_vulnerable %}
<a href="{{ dependency.get_absolute_url }}#vulnerabilities">
<i class="fa-solid fa-bug fa-sm has-text-danger" title="Vulnerabilities"></i>
</a>
{% endif %}
</td>
<td>
<a href="?type={{ dependency.type }}" class="is-black-link">{{ dependency.type }}</a>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,17 @@
<div class="level-item has-text-centered">
<div>
<p class="heading">Dependencies</p>
<p class="{{ title_class }}">
<p class="{{ title_class }} is-flex is-align-items-center is-justify-content-center">
{% if project.dependency_count %}
<a href="{% url 'project_dependencies' project.slug %}">
{{ project.dependency_count|intcomma }}
</a>
{% if project.vulnerable_dependency_count %}
<a href="{% url 'project_dependencies' project.slug %}?is_vulnerable=yes" class="has-text-danger is-size-5 ml-2">
{{ project.vulnerable_dependency_count|intcomma }}
<i class="fa-solid fa-bug is-size-6"></i>
</a>
{% endif %}
{% else %}
<span>0</span>
{% endif %}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
{% for vulnerability in tab_data.fields.affected_by_vulnerabilities.value %}
<tr>
<td>
<a href="{{ vulnerablecode_url }}vulnerabilities/{{ vulnerability.vulnerability_id }}" target="_blank">
<a href="{{ VULNERABLECODE_URL }}vulnerabilities/{{ vulnerability.vulnerability_id }}" target="_blank">
{{ vulnerability.vulnerability_id }}
<i class="fa-solid fa-up-right-from-square is-small"></i>
</a>
Expand Down
12 changes: 8 additions & 4 deletions scanpipe/tests/data/asgiref-3.3.0_load_inventory_expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,8 @@
"for_package_uid": "pkg:pypi/[email protected]?uuid=fixed-uid-done-for-testing-5642512d1758",
"datafile_path": "asgiref-3.3.0-py3-none-any.whl",
"datasource_id": "pypi_wheel",
"package_type": "pypi"
"package_type": "pypi",
"affected_by_vulnerabilities": []
},
{
"purl": "pkg:pypi/pytest",
Expand All @@ -264,7 +265,8 @@
"for_package_uid": "pkg:pypi/[email protected]?uuid=fixed-uid-done-for-testing-5642512d1758",
"datafile_path": "asgiref-3.3.0-py3-none-any.whl-extract/asgiref-3.3.0.dist-info/METADATA",
"datasource_id": "pypi_wheel_metadata",
"package_type": "pypi"
"package_type": "pypi",
"affected_by_vulnerabilities": []
},
{
"purl": "pkg:pypi/pytest-asyncio",
Expand All @@ -277,7 +279,8 @@
"for_package_uid": "pkg:pypi/[email protected]?uuid=fixed-uid-done-for-testing-5642512d1758",
"datafile_path": "asgiref-3.3.0-py3-none-any.whl",
"datasource_id": "pypi_wheel",
"package_type": "pypi"
"package_type": "pypi",
"affected_by_vulnerabilities": []
},
{
"purl": "pkg:pypi/pytest-asyncio",
Expand All @@ -290,7 +293,8 @@
"for_package_uid": "pkg:pypi/[email protected]?uuid=fixed-uid-done-for-testing-5642512d1758",
"datafile_path": "asgiref-3.3.0-py3-none-any.whl-extract/asgiref-3.3.0.dist-info/METADATA",
"datasource_id": "pypi_wheel_metadata",
"package_type": "pypi"
"package_type": "pypi",
"affected_by_vulnerabilities": []
}
],
"files": [
Expand Down
Loading

0 comments on commit bda3a70

Please sign in to comment.