Skip to content

Commit

Permalink
Merge pull request #279 from juliendoutre/julien.doutre/npm-direct-ur…
Browse files Browse the repository at this point in the history
…l-dependency

[SINT-1536] Add new NPM metadata detector to catch dependencies fetched from URLs
  • Loading branch information
christophetd authored Oct 3, 2023
2 parents 83ca3cb + 5a95438 commit 5290371
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 15 deletions.
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ guarddog pypi scan /tmp/triage.tar.gz

# Scan a local directory, the packages need to be located in the root directory
# For instance you have several pypi packages in ./samples/ like:
# ./samples/package1.tar.gz ./samples/package2.zip ./samples/package3.whl
# ./samples/package1.tar.gz ./samples/package2.zip ./samples/package3.whl
# FYI if a file not supported by guarddog is found you will get an error
# Here is the command to scan a directory:
guarddog pypi scan ./samples/
Expand Down Expand Up @@ -88,6 +88,7 @@ Source code heuristics:
|:-------------:|:---------------:|
| shady-links | Identify when a package contains an URL to a domain with a suspicious extension |
| obfuscation | Identify when a package uses a common obfuscation method often used by malware |
| clipboard-access | Identify when a package reads or write data from the clipboard |
| exfiltrate-sensitive-data | Identify when a package reads and exfiltrates sensitive data from the local system |
| download-executable | Identify when a package downloads and makes executable a remote binary |
| exec-base64 | Identify when a package dynamically executes base64-encoded code |
Expand Down Expand Up @@ -128,6 +129,7 @@ Metadata heuristics:
| release_zero | Identify packages with an release version that's 0.0 or 0.0.0 |
| potentially_compromised_email_domain | Identify when a package maintainer e-mail domain (and therefore package manager account) might have been compromised |
| typosquatting | Identify packages that are named closely to an highly popular package |
| direct_url_dependency | Identify packages with direct URL dependencies. Dependencies fetched this way are not immutable and can be used to inject untrusted code or reduce the likelihood of a reproducible install. |


<!-- END_RULE_LIST -->
Expand Down Expand Up @@ -230,12 +232,12 @@ flake8 guarddog --count --max-line-length=120 --statistics --exclude tests/analy

## Acknowledgments

Authors:
Authors:
* [Ellen Wang](https://www.linkedin.com/in/ellen-wang-4bb5961a0/)
* [Christophe Tafani-Dereeper](https://github.com/christophetd)
* [Vladimir de Turckheim](https://www.linkedin.com/in/vladimirdeturckheim/)

Inspiration:
Inspiration:
* [Backstabber’s Knife Collection: A Review of Open Source Software Supply Chain Attacks](https://arxiv.org/pdf/2005.09535)
* [What are Weak Links in the npm Supply Chain?](https://arxiv.org/pdf/2112.10165.pdf)
* [A Survey on Common Threats in npm and PyPi Registries](https://arxiv.org/pdf/2108.09576.pdf)
Expand Down
11 changes: 8 additions & 3 deletions guarddog/analyzer/metadata/npm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
from guarddog.analyzer.metadata.npm.empty_information import NPMEmptyInfoDetector
from guarddog.analyzer.metadata.npm.potentially_compromised_email_domain import \
NPMPotentiallyCompromisedEmailDomainDetector
from guarddog.analyzer.metadata.npm.potentially_compromised_email_domain import (
NPMPotentiallyCompromisedEmailDomainDetector,
)
from guarddog.analyzer.metadata.npm.release_zero import NPMReleaseZeroDetector
from guarddog.analyzer.metadata.npm.typosquatting import NPMTyposquatDetector
from guarddog.analyzer.metadata.npm.direct_url_dependency import (
NPMDirectURLDependencyDetector,
)

NPM_METADATA_RULES = {}

classes = [
NPMEmptyInfoDetector,
NPMReleaseZeroDetector,
NPMPotentiallyCompromisedEmailDomainDetector,
NPMTyposquatDetector
NPMTyposquatDetector,
NPMDirectURLDependencyDetector,
]

for detectorClass in classes:
Expand Down
66 changes: 66 additions & 0 deletions guarddog/analyzer/metadata/npm/direct_url_dependency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
""" Direct URL Dependency Detector
Detects if a package depends on direct URL dependencies
"""
from typing import Optional
import re

from guarddog.analyzer.metadata.detector import Detector

from urllib.parse import urlparse


github_project_pattern = re.compile(r"^([\w\-\.]+)/([\w\-\.]+)")


class NPMDirectURLDependencyDetector(Detector):
"""This heuristic detects packages with direct URL dependencies.
Dependencies fetched this way are not immutable and can be used to inject untrusted code
or reduce the likelihood of a reproducible install."""

def __init__(self):
super().__init__(
name="direct_url_dependency",
description="Identify packages with direct URL dependencies. \
Dependencies fetched this way are not immutable and can be used to \
inject untrusted code or reduce the likelihood of a reproducible install.",
)

def detect(
self,
package_info,
path: Optional[str] = None,
name: Optional[str] = None,
version: Optional[str] = None,
) -> tuple[bool, str]:
findings = []

for dep_name, dep_version in (
package_info.get("versions", {})
.get(version, {})
.get("dependencies", {})
.items()
):
# According to npm documentation, HTTP(s) and Git are accepted URL schemes when specifying dependencies:
# https://docs.npmjs.com/cli/v10/configuring-npm/package-json#urls-as-dependencies
# https://docs.npmjs.com/cli/v10/configuring-npm/package-json#git-urls-as-dependencies
if urlparse(dep_version).scheme in [
"http",
"https",
"git",
"git+ssh",
"git+http",
"git+https",
"git+file",
]:
findings.append(
f"Dependency {dep_name} refers to a direct Git or HTTP URL {dep_version}."
)
# According to npm documentation, Github repositories are accepted when specifying dependencies:
# https://docs.npmjs.com/cli/v10/configuring-npm/package-json#github-urls
elif github_project_pattern.match(dep_version):
findings.append(
f"Dependency {dep_name} refers to a direct Github repository {dep_version}."
)

return len(findings) != 0, "\n".join(findings)
45 changes: 39 additions & 6 deletions guarddog/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
ALL_RULES = \
set(get_metadata_detectors(ECOSYSTEM.NPM).keys()) \
| set(get_metadata_detectors(ECOSYSTEM.PYPI).keys()) | SEMGREP_RULE_NAMES
NPM_RULES = set(get_metadata_detectors(ECOSYSTEM.NPM).keys()) | SEMGREP_RULE_NAMES
PYPI_RULES = set(get_metadata_detectors(ECOSYSTEM.PYPI).keys()) | SEMGREP_RULE_NAMES
EXIT_CODE_ISSUES_FOUND = 1

AVAILABLE_LOG_LEVELS = {
Expand All @@ -39,9 +41,25 @@
def common_options(fn):
fn = click.option("--exit-non-zero-on-finding", default=False, is_flag=True,
help="Exit with a non-zero status code if at least one issue is identified")(fn)
fn = click.argument("target")(fn)
return fn


def legacy_rules_options(fn):
fn = click.option("-r", "--rules", multiple=True, type=click.Choice(ALL_RULES, case_sensitive=False))(fn)
fn = click.option("-x", "--exclude-rules", multiple=True, type=click.Choice(ALL_RULES, case_sensitive=False))(fn)
fn = click.argument("target")(fn)
return fn


def npm_options(fn):
fn = click.option("-r", "--rules", multiple=True, type=click.Choice(NPM_RULES, case_sensitive=False))(fn)
fn = click.option("-x", "--exclude-rules", multiple=True, type=click.Choice(NPM_RULES, case_sensitive=False))(fn)
return fn


def pypi_options(fn):
fn = click.option("-r", "--rules", multiple=True, type=click.Choice(PYPI_RULES, case_sensitive=False))(fn)
fn = click.option("-x", "--exclude-rules", multiple=True, type=click.Choice(PYPI_RULES, case_sensitive=False))(fn)
return fn


Expand Down Expand Up @@ -82,15 +100,24 @@ def cli(log_level):
pass


def _get_rule_pram(rules, exclude_rules):
def _get_rule_param(rules, exclude_rules, ecosystem):
rule_param = None
if len(rules) > 0:
rule_param = rules

if len(exclude_rules) > 0:
rule_param = ALL_RULES - set(exclude_rules)
all_rules = SEMGREP_RULE_NAMES
if ecosystem == ECOSYSTEM.NPM:
all_rules |= set(get_metadata_detectors(ECOSYSTEM.NPM).keys())
elif ecosystem == ECOSYSTEM.PYPI:
all_rules |= set(get_metadata_detectors(ECOSYSTEM.PYPI).keys())

rule_param = all_rules - set(exclude_rules)

if len(rules) > 0:
print("--rules and --exclude-rules cannot be used together")
exit(1)

return rule_param


Expand All @@ -101,7 +128,7 @@ def _verify(path, rules, exclude_rules, output_format, exit_non_zero_on_finding,
path (str): path to requirements.txt file
"""
return_value = None
rule_param = _get_rule_pram(rules, exclude_rules)
rule_param = _get_rule_param(rules, exclude_rules, ecosystem)
scanner = get_scanner(ecosystem, True)
if scanner is None:
sys.stderr.write(f"Command verify is not supported for ecosystem {ecosystem}")
Expand All @@ -122,7 +149,7 @@ def display_result(result: dict) -> None:
return_value = js.dumps(results)

if output_format == "sarif":
return_value = report_verify_sarif(path, list(ALL_RULES), results, ecosystem)
return_value = report_verify_sarif(path, list(rule_param), results, ecosystem)

if output_format is not None:
print(return_value)
Expand Down Expand Up @@ -160,7 +187,7 @@ def _scan(identifier, version, rules, exclude_rules, output_format, exit_non_zer
rules (list[str]): specific rules to run, defaults to all
"""

rule_param = _get_rule_pram(rules, exclude_rules)
rule_param = _get_rule_param(rules, exclude_rules, ecosystem)
scanner = cast(Optional[PackageScanner], get_scanner(ecosystem, False))
if scanner is None:
sys.stderr.write(f"Command scan is not supported for ecosystem {ecosystem}")
Expand Down Expand Up @@ -233,6 +260,7 @@ def pypi(**kwargs):
@npm.command("scan")
@common_options
@scan_options
@npm_options
def scan_npm(target, version, rules, exclude_rules, output_format, exit_non_zero_on_finding):
""" Scan a given npm package
"""
Expand All @@ -242,6 +270,7 @@ def scan_npm(target, version, rules, exclude_rules, output_format, exit_non_zero
@npm.command("verify")
@common_options
@verify_options
@npm_options
def verify_npm(target, rules, exclude_rules, output_format, exit_non_zero_on_finding):
""" Verify a given npm project
"""
Expand All @@ -251,6 +280,7 @@ def verify_npm(target, rules, exclude_rules, output_format, exit_non_zero_on_fin
@pypi.command("scan")
@common_options
@scan_options
@pypi_options
def scan_pypi(target, version, rules, exclude_rules, output_format, exit_non_zero_on_finding):
""" Scan a given PyPI package
"""
Expand All @@ -260,6 +290,7 @@ def scan_pypi(target, version, rules, exclude_rules, output_format, exit_non_zer
@pypi.command("verify")
@common_options
@verify_options
@pypi_options
def verify_pypi(target, rules, exclude_rules, output_format, exit_non_zero_on_finding):
""" Verify a given Pypi project
"""
Expand All @@ -283,13 +314,15 @@ def list_rules_npm():
@cli.command("verify", deprecated=True)
@common_options
@verify_options
@legacy_rules_options
def verify(target, rules, exclude_rules, output_format, exit_non_zero_on_finding):
return _verify(target, rules, exclude_rules, output_format, exit_non_zero_on_finding, ECOSYSTEM.PYPI)


@cli.command("scan", deprecated=True)
@common_options
@scan_options
@legacy_rules_options
def scan(target, version, rules, exclude_rules, output_format, exit_non_zero_on_finding):
return _scan(target, version, rules, exclude_rules, output_format, exit_non_zero_on_finding, ECOSYSTEM.PYPI)

Expand Down
33 changes: 33 additions & 0 deletions tests/analyzer/metadata/test_direct_url_dependency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from copy import deepcopy

import pytest

from guarddog.analyzer.metadata.npm.direct_url_dependency import (
NPMDirectURLDependencyDetector,
)
from tests.analyzer.metadata.resources.sample_project_info import NPM_PACKAGE_INFO


class TestDirectURLDependency:
npm_detector = NPMDirectURLDependencyDetector()

test_data = [
("2.0.1", False),
("http://asdf.com/asdf.tar.gz", True),
("git+ssh://[email protected]:npm/cli.git#v1.0.27", True),
("git+ssh://[email protected]:npm/cli#semver:^5.0", True),
("git+https://[email protected]/npm/cli.git", True),
("git://github.com/npm/cli.git#v1.0.27", True),
("expressjs/express", True),
("mochajs/mocha#4727d357ea", True),
("user/repo#feature\/branch", True),
]

@pytest.mark.parametrize("version,expected_matches", test_data)
def test_npm_direct_url_dependencies(self, version, expected_matches):
package_info = deepcopy(NPM_PACKAGE_INFO)
package_info["versions"]["2.0.0"]["dependencies"]["foo"] = version
matches, _ = self.npm_detector.detect(
package_info, name="", path="", version="2.0.0"
)
assert matches == expected_matches
4 changes: 2 additions & 2 deletions tests/analyzer/sourcecode/code-execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"""


""" OK: Executing python files to extract version information
""" OK: Executing python files to extract version information
"""
# ok: code-execution
exec(open("vapory_git/version.py").read()) # loads __version__
Expand Down Expand Up @@ -134,7 +134,7 @@ def RunCustomCommand(command_list):
subprocess.check_output(["git", "describe", "--tags"]).strip()


""" RULEID: executing bash commands
""" RULEID: executing bash commands
"""
# ruleid: code-execution
os.system('start ' + filename)
Expand Down
2 changes: 1 addition & 1 deletion tests/analyzer/sourcecode/exec-base64.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
- Subprocess module
- os module
- Command module
Inner partitions:
- Muliline executions with intermediate assignments
- Including/excluding module name in function calls
Expand Down

0 comments on commit 5290371

Please sign in to comment.