Skip to content

Commit

Permalink
🚜 Address bandit issues (openvinotoolkit#1152)
Browse files Browse the repository at this point in the history
* Fix metadata path

* Address bandit issues

* Address codacy issues

* Changed the bandit configuration file

* Address PR comments

* bandit fix.
  • Loading branch information
samet-akcay authored Jul 20, 2023
1 parent 2083c51 commit 9323985
Show file tree
Hide file tree
Showing 8 changed files with 107 additions and 60 deletions.
18 changes: 12 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,29 @@ repos:

# python code formatting
- repo: https://github.com/psf/black
rev: 23.1.0
rev: 23.3.0
hooks:
- id: black

# Ruff version.
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: "v0.0.253"
rev: "v0.0.275"
hooks:
- id: ruff
exclude: "tests"
args: ["--fix"]

# python static type checking
- repo: https://github.com/pre-commit/mirrors-mypy
rev: "v1.0.1"
rev: "v1.4.1"
hooks:
- id: mypy
additional_dependencies: [types-PyYAML]
exclude: "tests"

# notebooks.
- repo: https://github.com/nbQA-dev/nbQA
rev: 1.6.3
rev: 1.7.0
hooks:
- id: nbqa-black
- id: nbqa-ruff
Expand All @@ -46,12 +46,12 @@ repos:
args: ["--ignore=I001"]

- repo: https://github.com/pre-commit/mirrors-prettier
rev: v3.0.0-alpha.4
rev: v3.0.0-alpha.9-for-vscode
hooks:
- id: prettier

- repo: https://github.com/igorshubovych/markdownlint-cli
rev: v0.33.0
rev: v0.35.0
hooks:
- id: markdownlint

Expand All @@ -62,3 +62,9 @@ repos:
name: Lint Dockerfiles
description: Runs hadolint to lint Dockerfiles
args: ["--ignore", "DL3008"]
- repo: https://github.com/PyCQA/bandit
rev: 1.7.5
hooks:
- id: bandit
args: ["-c", ".ci/ipas_default.config"]
additional_dependencies: ["bandit"]
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ source = [
".tox/*/site-packages",
]


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# NBQA CONFIGURATION #
[tool.nbqa.addopts]
Expand Down
4 changes: 2 additions & 2 deletions src/anomalib/data/utils/augmenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ def generate_perturbation(
perlin_scale = 6
min_perlin_scale = 0

perlin_scalex = 2 ** random.randint(min_perlin_scale, perlin_scale)
perlin_scaley = 2 ** random.randint(min_perlin_scale, perlin_scale)
perlin_scalex = 2 ** random.randint(min_perlin_scale, perlin_scale) # nosec: B311
perlin_scaley = 2 ** random.randint(min_perlin_scale, perlin_scale) # nosec: B311

perlin_noise = random_2d_perlin((nextpow2(height), nextpow2(width)), (perlin_scalex, perlin_scaley))[
:height, :width
Expand Down
67 changes: 46 additions & 21 deletions src/anomalib/data/utils/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
import io
import logging
import os
import re
import tarfile
from dataclasses import dataclass
from pathlib import Path
from tarfile import TarError, TarFile
from tarfile import TarFile, TarInfo
from typing import Iterable
from urllib.request import urlretrieve
from zipfile import ZipFile
Expand Down Expand Up @@ -205,6 +206,37 @@ def update_to(self, chunk_number: int = 1, max_chunk_size: int = 1, total_size=N
self.update(chunk_number * max_chunk_size - self.n)


def is_file_potentially_dangerous(file_name: str) -> bool:
"""Check if a file is potentially dangerous.
Args:
file_name (str): Filename.
Returns:
bool: True if the member is potentially dangerous, False otherwise.
"""
# Some example criteria. We could expand this.
unsafe_patterns = ["/etc/", "/root/"]
for pattern in unsafe_patterns:
if re.search(pattern, file_name):
return True
return False


def safe_extract(tar_file: TarFile, root: Path, members: list[TarInfo]) -> None:
"""Extract safe members from a tar archive.
Args:
tar_file (TarFile): TarFile object.
root (Path): Root directory where the dataset will be stored.
members (List[TarInfo]): List of safe members to be extracted.
"""
for member in members:
tar_file.extract(member, root)


def hash_check(file_path: Path, expected_hash: str) -> None:
"""Raise assert error if hash does not match the calculated hash of the file.
Expand All @@ -214,7 +246,7 @@ def hash_check(file_path: Path, expected_hash: str) -> None:
"""
with file_path.open("rb") as hash_file:
assert (
hashlib.md5(hash_file.read()).hexdigest() == expected_hash
hashlib.new(name="md5", data=hash_file.read(), usedforsecurity=False).hexdigest() == expected_hash
), f"Downloaded file {file_path} does not match the required hash."


Expand All @@ -227,17 +259,26 @@ def extract(file_name: Path, root: Path) -> None:
"""
logger.info("Extracting dataset into root folder.")

# Safely extract zip files
if file_name.suffix == ".zip":
with ZipFile(file_name, "r") as zip_file:
zip_file.extractall(root)
for file_info in zip_file.infolist():
if not is_file_potentially_dangerous(file_info.filename):
zip_file.extract(file_info, root)

# Safely extract tar files.
elif file_name.suffix in (".tar", ".gz", ".xz", ".tgz"):
with tarfile.open(file_name) as tar_file:
safe_extract(tar_file, root)
members = tar_file.getmembers()
safe_members = [member for member in members if not is_file_potentially_dangerous(member.name)]
safe_extract(tar_file, root, safe_members)

else:
raise ValueError(f"Unrecognized file format: {file_name}")

logger.info("Cleaning up files.")
(file_name).unlink()
file_name.unlink()


def download_and_extract(root: Path, info: DownloadInfo) -> None:
Expand Down Expand Up @@ -286,19 +327,3 @@ def is_within_directory(directory: Path, target: Path):
# TODO: replace with pathlib is_relative_to after switching to Python 3.10
prefix = os.path.commonprefix([abs_directory, abs_target])
return prefix == str(abs_directory)


def safe_extract(tar_file: TarFile, path: str | Path = "."):
"""Extract a tar file safely by first checking for attempted path traversal.
Args:
tar_file (TarFile): Tar file to be extracted
path (str | Path): path in which the extracted files will be placed
"""
path = Path(path)
for member in tar_file.getmembers():
member_path = path / member.name
if not is_within_directory(path, member_path):
raise TarError("Attempted Path Traversal in Tar File")

tar_file.extractall(path)
67 changes: 41 additions & 26 deletions src/anomalib/models/ai_vad/clip/clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,20 @@
# SPDX-License-Identifier: Apache-2.0

import hashlib
import logging
import os
import urllib
import warnings
from typing import List, Union
from urllib.parse import urlparse

import requests
import torch
from PIL import Image
from pkg_resources import packaging
from torchvision.transforms import CenterCrop, Compose, Normalize, Resize, ToTensor
from tqdm import tqdm

logger = logging.getLogger(__name__)
from .model import build_model

try:
Expand Down Expand Up @@ -50,36 +53,48 @@
}


def _verify_checksum(file_path: str, url: str) -> bool:
expected_sha256 = url.split("/")[-2]
sha256_hash = hashlib.sha256()

with open(file_path, "rb") as file:
for chunk in iter(lambda: file.read(4096), b""):
sha256_hash.update(chunk)

file_hash = sha256_hash.hexdigest()

return file_hash == expected_sha256


def _download(url: str, root: str):
os.makedirs(root, exist_ok=True)
filename = os.path.basename(url)

expected_sha256 = url.split("/")[-2]
filename = os.path.basename(urlparse(url).path)
download_target = os.path.join(root, filename)

if os.path.exists(download_target) and not os.path.isfile(download_target):
raise RuntimeError(f"{download_target} exists and is not a regular file")

if os.path.isfile(download_target):
if hashlib.sha256(open(download_target, "rb").read()).hexdigest() == expected_sha256:
if os.path.exists(download_target):
if not os.path.isfile(download_target):
raise FileExistsError(f"{download_target} exists and is not a regular file")
if _verify_checksum(download_target, url):
return download_target
else:
warnings.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file")

with urllib.request.urlopen(url) as source, open(download_target, "wb") as output:
with tqdm(
total=int(source.info().get("Content-Length")), ncols=80, unit="iB", unit_scale=True, unit_divisor=1024
) as loop:
while True:
buffer = source.read(8192)
if not buffer:
break

output.write(buffer)
loop.update(len(buffer))

if hashlib.sha256(open(download_target, "rb").read()).hexdigest() != expected_sha256:
raise RuntimeError("Model has been downloaded but the SHA256 checksum does not not match")

logger.warning("%s exists, but the checksum does not match; re-downloading the file", download_target)
os.remove(download_target)

response = requests.get(url, stream=True, timeout=10.0) # Timeout is for bandit security linter
response.raise_for_status()

total_size = int(response.headers.get("Content-Length", 0))

with open(download_target, "wb") as file, tqdm(
total=total_size, ncols=80, unit="iB", unit_scale=True, unit_divisor=1024
) as loop:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
file.write(chunk)
loop.update(len(chunk))

if not _verify_checksum(download_target, url):
raise RuntimeError("Model has been downloaded but the checksum does not match")

return download_target

Expand Down
4 changes: 2 additions & 2 deletions src/anomalib/models/efficient_ad/torch_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,8 @@ def choose_random_aug_image(self, image: Tensor) -> Tensor:
transforms.functional.adjust_saturation,
]
# Sample an augmentation coefficient λ from the uniform distribution U(0.8, 1.2)
coefficient = random.uniform(0.8, 1.2)
transform_function = random.choice(transform_functions)
coefficient = random.uniform(0.8, 1.2) # nosec: B311
transform_function = random.choice(transform_functions) # nosec: B311
return transform_function(image, coefficient)

def forward(self, batch: Tensor, batch_imagenet: Tensor = None) -> Tensor | dict:
Expand Down
4 changes: 2 additions & 2 deletions tests/pre_merge/utils/metrics/test_adaptive_threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ def test_manual_threshold():
config.metrics.image = ["F1Score"]
config.metrics.pixel = ["F1Score"]

image_threshold = random.random()
pixel_threshold = random.random()
image_threshold = random.random() # nosec: B311
pixel_threshold = random.random() # nosec: B311
config.metrics.threshold.manual_image = image_threshold
config.metrics.threshold.manual_pixel = pixel_threshold

Expand Down
2 changes: 1 addition & 1 deletion tools/benchmarking/utils/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def get_unique_key(str_len: int) -> str:
Returns:
str: Random string
"""
return "".join([random.choice(string.ascii_lowercase) for _ in range(str_len)])
return "".join([random.choice(string.ascii_lowercase) for _ in range(str_len)]) # nosec: B311


def upload_to_wandb(
Expand Down

0 comments on commit 9323985

Please sign in to comment.