Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix long file name bug and clear fastapi reg ex DOS CVE-2024-24762 #50

Merged
merged 4 commits into from
Feb 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 29 additions & 5 deletions elastic_datashader/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from collections import OrderedDict
from datetime import datetime, timedelta, timezone
from os import scandir
from hashlib import sha256
import os
from contextlib import suppress
from pathlib import Path
Expand All @@ -23,14 +24,37 @@ def path_age(now: datetime, path: Path) -> timedelta:

return now - path_dt

index_hash_map = {}

def get_index_hash(idx: str) -> str:
'''
Calculates a hash value for the specific index set
On some OS's the pathname becomes too long and causes errors when
creating files if multiple CCS indexes have been explicitly defined
*:my-data-* listed as
mysite-1:my-data-*,mysite-2:my-data-*,mysite-3:my-data-*,mysite-4:my-data-*,mysite-5:my-data-*
'''
idx_hash = index_hash_map.get(idx, None)
if idx_hash is not None:
return idx_hash
idx_hash = sha256()
idx_hash.update(str(idx).encode("utf-8"))
idx_hash = idx_hash.hexdigest()[0:20]
index_hash_map[idx] = idx_hash
return idx_hash

def tile_name(idx, x, y, z, parameter_hash) -> str:
return f"{idx}/{parameter_hash}/{z}/{x}/{y}.png"
idx_hash = get_index_hash(idx)
return f"{idx_hash}/{parameter_hash}/{z}/{x}/{y}.png"

def rendering_tile_name(idx, x, y, z, parameter_hash) -> str:
return f"{idx}/{parameter_hash}/{z}/{x}/{y}.rendering"
idx_hash = get_index_hash(idx)

return f"{idx_hash}/{parameter_hash}/{z}/{x}/{y}.rendering"

def tile_id(idx, x, y, z, parameter_hash) -> str:
return f"{idx}_{parameter_hash}_{z}_{x}_{y}"
idx_hash = get_index_hash(idx)
return f"{idx_hash}_{parameter_hash}_{z}_{x}_{y}"

def directory_size(path: Path) -> int:
'''
Expand Down Expand Up @@ -134,14 +158,14 @@ def release_cache_placeholder(cache_path: Path, tile: str) -> None:
if tile_path.exists():
tile_path.unlink(missing_ok=True)

def check_cache_dir(cache_path: Path, layer_name: str) -> None:
def check_cache_dir(cache_path: Path, idx: str) -> None:
"""
Ensure the folder ``cache_path``/``layer_name`` exists

:param cache_path: Top level directory
:param layer_name: Specific layer in cache
"""
tile_cache_path = cache_path / layer_name
tile_cache_path = cache_path / get_index_hash(idx)
tile_cache_path.mkdir(parents=True, exist_ok=True)

def clear_hash_cache(cache_path: Path, idx_name: str, param_hash: Optional[str]) -> None:
Expand Down
2 changes: 1 addition & 1 deletion elastic_datashader/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def get_parameter_hash(params: Dict[str, Any]) -> str:
p = p.isoformat()
parameter_hash.update(str(p).encode("utf-8"))

return parameter_hash.hexdigest()
return parameter_hash.hexdigest()[0:30]

def extract_parameters(headers: Dict[Any, Any], query_params: Dict[Any, Any]) -> Tuple[str, Dict[str, Any]]:
"""
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ numpy = "^1.23"
PyYAML = "*"
humanize = "*"
uvicorn = {extras = ["standard"], version = "0.24.0", optional = true}
fastapi = "^0.96"
fastapi = ">=0.109.1"
georgio = "2023.156.924"
jinja2 = "3.1.2"

Expand Down
8 changes: 4 additions & 4 deletions tests/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ def test_du(tmp_path):


def test_tile_name():
assert cache.tile_name("abc", 1, 2, 3, "somehash") == "abc/somehash/3/1/2.png"
assert cache.tile_name("abc", 1, 2, 3, "somehash") == "ba7816bf8f01cfea4141/somehash/3/1/2.png"


def test_tile_id():
assert cache.tile_id("abc", 1, 2, 3, "somehash") == "abc_somehash_3_1_2"
assert cache.tile_id("abc", 1, 2, 3, "somehash") == "ba7816bf8f01cfea4141_somehash_3_1_2"


def test_get_cache_none():
Expand All @@ -57,8 +57,8 @@ def test_set_cache(tmp_path):


def test_check_cache_dir(tmp_path):
cache.check_cache_dir(tmp_path, "foo")
assert (tmp_path / "foo").exists()
cache.check_cache_dir(tmp_path, "abc")
assert (tmp_path / "ba7816bf8f01cfea4141").exists()


def test_clear_hash_cache(tmp_path):
Expand Down
6 changes: 3 additions & 3 deletions tests/test_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,9 @@ def test_get_category_field():
assert parameters.get_category_field("banana") == "banana"

def test_get_parameter_hash():
assert parameters.get_parameter_hash({"foo": "bar", "baz": 1}) == "a6488297eb1cdaa23e196800b1c399b13d9a5c86cfecaf8f9fd0fbe9af7533db"
assert parameters.get_parameter_hash({"foo": "bar", "baz": 1, "abc": datetime(2022, 2, 17, 11, 0, 0, tzinfo=timezone.utc)}) == "88ade56886a8099e6fd3c25525a0fb9ab3938be64374569480ed3bfd7f3d70e9"
assert parameters.get_parameter_hash({}) == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
assert parameters.get_parameter_hash({"foo": "bar", "baz": 1}) == "a6488297eb1cdaa23e196800b1c399"
assert parameters.get_parameter_hash({"foo": "bar", "baz": 1, "abc": datetime(2022, 2, 17, 11, 0, 0, tzinfo=timezone.utc)}) == "88ade56886a8099e6fd3c25525a0fb"
assert parameters.get_parameter_hash({}) == "e3b0c44298fc1c149afbf4c8996fb9"

def test_get_time_bounds_already_quantized():
now = datetime(2022, 6, 14, 12, 15, 0, tzinfo=timezone.utc)
Expand Down
Loading