Skip to content

Commit

Permalink
Allow using GitHub URLs as targets
Browse files Browse the repository at this point in the history
Running the CLI with a target that starts with https://github.com
would be allowed. If it encounters a GitHub URL it will:
* Download a zip ball for the repo
* Extract the zip file into a temporary directory
* Delete the zip file
* Run analysis on the files in that temp directory

Signed-off-by: Eric Brown <[email protected]>
  • Loading branch information
ericwb committed Feb 1, 2024
1 parent 5ee3771 commit ded8728
Show file tree
Hide file tree
Showing 7 changed files with 142 additions and 19 deletions.
106 changes: 95 additions & 11 deletions precli/cli/main.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
# Copyright 2023 Secure Saurce LLC
# Copyright 2024 Secure Saurce LLC
import argparse
import io
import logging
import os
import pathlib
import sys
import tempfile
import traceback
import zipfile
from urllib.parse import urljoin
from urllib.parse import urlparse

import requests
from ignorelib import IgnoreFilterManager
from rich import progress

Expand All @@ -33,6 +38,7 @@ def _init_logger(log_level=logging.INFO):
LOG.handlers = []
logging.captureWarnings(True)
LOG.setLevel(log_level)
logging.getLogger("urllib3").setLevel(log_level)
handler = logging.StreamHandler(sys.stderr)
LOG.addHandler(handler)
LOG.debug("logging initialized")
Expand Down Expand Up @@ -124,30 +130,95 @@ def build_ignore_mgr(path: str, ignore_file: str) -> IgnoreFilterManager:
)


def get_owner_repo(repo_url: str):
# Extract owner and repository name from the URL
path = urlparse(repo_url).path.lstrip("/").split("/")
return path[0], path[1]


def get_default_branch(owner: str, repo: str):
api_url = f"https://api.github.com/repos/{owner}/{repo}"
response = requests.get(api_url)
response.raise_for_status()
return response.json().get("default_branch")


def extract_github_repo(owner: str, repo: str, branch: str):
base_url = "https://api.github.com/repos"
api_url = f"{base_url}/{owner}/{repo}/zipball/{branch}"
temp_dir = tempfile.mkdtemp()
zip_path = os.path.join(temp_dir, f"{repo}.zip")

with requests.get(api_url, stream=True) as r:
r.raise_for_status()
with open(zip_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)

with zipfile.ZipFile(zip_path, "r") as zip_ref:
zip_ref.extractall(temp_dir)

os.remove(zip_path)

for path in os.listdir(temp_dir):
if path.startswith(f"{owner}-{repo}-"):
temp_dir = os.path.join(temp_dir, path)

return temp_dir


def file_to_url(owner, repo, branch, target, root, file):
target_len = len(target)
prefix = root[target_len:].lstrip("/")
urlpath = f"{owner}/{repo}/blob/{branch}"
rel_path = "/".join([urlpath, prefix, file])
return urljoin("https://github.com", rel_path)


def discover_files(targets: list[str], recursive: bool):
file_list = []
file_map = {}

for target in targets:
if target.startswith("https://github.com"):
owner, repo = get_owner_repo(target)
if repo:
try:
branch = get_default_branch(owner, repo)
target = extract_github_repo(owner, repo, branch)
except requests.exceptions.ConnectionError:
owner = None
repo = None
else:
owner = None
repo = None

for fname in targets:
if os.path.isdir(fname):
gitignore_mgr = build_ignore_mgr(fname, ".gitignore")
preignore_mgr = build_ignore_mgr(fname, ".preignore")
if os.path.isdir(target):
gitignore_mgr = build_ignore_mgr(target, ".gitignore")
preignore_mgr = build_ignore_mgr(target, ".preignore")

if recursive is True:
for root, _, files in gitignore_mgr.walk():
for file in files:
if not preignore_mgr.is_ignored(file):
file_list.append(os.path.join(root, file))
path = os.path.join(root, file)
file_list.append(path)
if repo:
file_map[path] = file_to_url(
owner, repo, branch, target, root, file
)
else:
files = os.listdir(path=fname)
files = os.listdir(path=target)
for file in files:
if not (
gitignore_mgr.is_ignored(file)
or preignore_mgr.is_ignored(file)
):
file_list.append(os.path.join(fname, file))
file_list.append(os.path.join(target, file))
else:
file_list.append(fname)
return file_list
file_list.append(target)

return file_list, file_map


def run_checks(parsers: dict, file_list: list[str]) -> list[Result]:
Expand Down Expand Up @@ -258,10 +329,23 @@ def main():
parsers = loader.load_parsers(enabled, disabled)

# Compile a list of the targets
file_list = discover_files(args.targets, args.recursive)
file_list, file_map = discover_files(args.targets, args.recursive)

results, metrics = run_checks(parsers, file_list)

# Set the location url in the result if original target was URL based
for result in results:
net_loc = file_map.get(result.location.file_name)
if net_loc is not None:
if result.location.start_line != result.location.end_line:
lines = (
f"L{result.location.start_line}-"
f"L{result.location.end_line}"
)
else:
lines = f"L{result.location.start_line}"
result.location.url = f"{net_loc}#{lines}"

if args.json is True:
json = Json(args.no_color)
json.render(results, metrics)
Expand Down
24 changes: 23 additions & 1 deletion precli/core/location.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
# Copyright 2023 Secure Saurce LLC
# Copyright 2024 Secure Saurce LLC
from tree_sitter import Node


class Location:
def __init__(
self,
file_name: str = None,
url: str = None,
node: Node = None,
start_line: int = 0,
end_line: int = -1,
start_column: int = 1,
end_column: int = -1,
):
self._file_name = file_name
self._url = url
if node is not None:
self._start_line = node.start_point[0] + 1
self._start_column = node.start_point[1]
Expand All @@ -35,6 +37,26 @@ def file_name(self) -> str:
"""
return self._file_name

@property
def url(self) -> str:
"""
If the original target was given as a URL, this
property will return that address.
:return: URL
:rtype: str
"""
return self._url

@url.setter
def url(self, url: str):
"""
Set the file location as a URL
:param str url: file network location
"""
self._url = url

@property
def start_line(self) -> int:
"""
Expand Down
2 changes: 1 addition & 1 deletion precli/renderers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2023 Secure Saurce LLC
# Copyright 2024 Secure Saurce LLC
from abc import ABC
from abc import abstractmethod

Expand Down
9 changes: 7 additions & 2 deletions precli/renderers/detailed.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2023 Secure Saurce LLC
# Copyright 2024 Secure Saurce LLC
import linecache

from rich import box
Expand Down Expand Up @@ -36,9 +36,14 @@ def render(self, results: list[Result], metrics: Metrics):
emoji = ":information-emoji: "
style = "blue"

if result.location.url is not None:
file_name = result.location.url
else:
result.location.file_name

self.console.print(
f"{emoji} {result.level.name.title()} on line "
f"{result.location.start_line} in {result.location.file_name}",
f"{result.location.start_line} in {file_name}",
style=style,
markup=False,
)
Expand Down
9 changes: 7 additions & 2 deletions precli/renderers/json.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2023 Secure Saurce LLC
# Copyright 2024 Secure Saurce LLC
import json

from rich import console
Expand All @@ -19,13 +19,18 @@ def render(self, results: list[Result], metrics: Metrics):
for result in results:
rule = Rule.get_by_id(result.rule_id)

if result.location.url is not None:
file_name = result.location.url
else:
result.location.file_name

results_json["results"].append(
{
"rule_id": rule.id,
"rule_name": rule.name,
"cwe_id": rule.cwe.cwe_id,
"severity": result.level.name,
"file_name": result.location.file_name,
"file_name": file_name,
"start_line": result.location.start_line,
"end_line": result.location.end_line,
"start_column": result.location.start_column,
Expand Down
10 changes: 8 additions & 2 deletions precli/renderers/plain.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2023 Secure Saurce LLC
# Copyright 2024 Secure Saurce LLC
import linecache

from rich import console
Expand Down Expand Up @@ -36,9 +36,15 @@ def render(self, results: list[Result], metrics: Metrics):
self.console.print(
f"{rule.id}: {rule.cwe.name}",
)

if result.location.url is not None:
file_name = result.location.url
else:
result.location.file_name

# TODO(ericwb): replace hardcoded <module> with actual scope
self.console.print(
f' File "{result.location.file_name}", line '
f' File "{file_name}", line '
f"{result.location.start_line}, in <module>",
)
code_line = linecache.getline(
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ rich # MIT
tree_sitter>=0.20.4
tree-sitter-languages>=1.9.1
ignorelib
requests

0 comments on commit ded8728

Please sign in to comment.