From f0d7249dd027ec60c7eb5717883b6addcdb9577e Mon Sep 17 00:00:00 2001 From: Sean Sullivan Date: Wed, 7 Feb 2024 11:06:22 -0500 Subject: [PATCH 1/4] Update fast api and hash index to shorten filepath names --- elastic_datashader/cache.py | 34 +++++++++++++++++++++++++++----- elastic_datashader/parameters.py | 2 +- pyproject.toml | 2 +- 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/elastic_datashader/cache.py b/elastic_datashader/cache.py index b5b8ab4..43e5e32 100644 --- a/elastic_datashader/cache.py +++ b/elastic_datashader/cache.py @@ -2,6 +2,7 @@ from collections import OrderedDict from datetime import datetime, timedelta, timezone from os import scandir +from hashlib import sha256 import os from contextlib import suppress from pathlib import Path @@ -23,14 +24,37 @@ def path_age(now: datetime, path: Path) -> timedelta: return now - path_dt +index_hash_map = {} + +def get_index_hash(idx: str) -> str: + ''' + Calculates a hash value for the specific index set + On some OS's the pathname becomes too long and causes errors when + creating files if multiple CCS indexes have been explicitly defined + *:my-data-* listed as + mysite-1:my-data-*,mysite-2:my-data-*,mysite-3:my-data-*,mysite-4:my-data-*,mysite-5:my-data-* + ''' + idx_hash = index_hash_map.get(idx,None) + if idx_hash is not None: + return idx_hash + idx_hash = sha256() + idx_hash.update(str(idx).encode("utf-8")) + idx_hash = idx_hash.hexdigest()[0:20] + index_hash_map[idx] = idx_hash + return idx_hash + def tile_name(idx, x, y, z, parameter_hash) -> str: - return f"{idx}/{parameter_hash}/{z}/{x}/{y}.png" + idx_hash = get_index_hash(idx) + return f"{idx_hash}/{parameter_hash}/{z}/{x}/{y}.png" def rendering_tile_name(idx, x, y, z, parameter_hash) -> str: - return f"{idx}/{parameter_hash}/{z}/{x}/{y}.rendering" + idx_hash = get_index_hash(idx) + + return f"{idx_hash}/{parameter_hash}/{z}/{x}/{y}.rendering" def tile_id(idx, x, y, z, parameter_hash) -> str: - return f"{idx}_{parameter_hash}_{z}_{x}_{y}" + idx_hash = get_index_hash(idx) + return f"{idx_hash}_{parameter_hash}_{z}_{x}_{y}" def directory_size(path: Path) -> int: ''' @@ -134,14 +158,14 @@ def release_cache_placeholder(cache_path: Path, tile: str) -> None: if tile_path.exists(): tile_path.unlink(missing_ok=True) -def check_cache_dir(cache_path: Path, layer_name: str) -> None: +def check_cache_dir(cache_path: Path, idx: str) -> None: """ Ensure the folder ``cache_path``/``layer_name`` exists :param cache_path: Top level directory :param layer_name: Specific layer in cache """ - tile_cache_path = cache_path / layer_name + tile_cache_path = cache_path / get_index_hash(idx) tile_cache_path.mkdir(parents=True, exist_ok=True) def clear_hash_cache(cache_path: Path, idx_name: str, param_hash: Optional[str]) -> None: diff --git a/elastic_datashader/parameters.py b/elastic_datashader/parameters.py index 9bb82c3..183096a 100644 --- a/elastic_datashader/parameters.py +++ b/elastic_datashader/parameters.py @@ -267,7 +267,7 @@ def get_parameter_hash(params: Dict[str, Any]) -> str: p = p.isoformat() parameter_hash.update(str(p).encode("utf-8")) - return parameter_hash.hexdigest() + return parameter_hash.hexdigest()[0:30] def extract_parameters(headers: Dict[Any, Any], query_params: Dict[Any, Any]) -> Tuple[str, Dict[str, Any]]: """ diff --git a/pyproject.toml b/pyproject.toml index 387a309..f61d11a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ numpy = "^1.23" PyYAML = "*" humanize = "*" uvicorn = {extras = ["standard"], version = "0.24.0", optional = true} -fastapi = "^0.96" +fastapi = ">=0.109.1" georgio = "2023.156.924" jinja2 = "3.1.2" From 09a102d2a6d3d10b90f3d77cdb9dbb97c0bcedb1 Mon Sep 17 00:00:00 2001 From: Sean Sullivan Date: Wed, 7 Feb 2024 11:09:47 -0500 Subject: [PATCH 2/4] Fix lint --- elastic_datashader/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elastic_datashader/cache.py b/elastic_datashader/cache.py index 43e5e32..54acd90 100644 --- a/elastic_datashader/cache.py +++ b/elastic_datashader/cache.py @@ -34,7 +34,7 @@ def get_index_hash(idx: str) -> str: *:my-data-* listed as mysite-1:my-data-*,mysite-2:my-data-*,mysite-3:my-data-*,mysite-4:my-data-*,mysite-5:my-data-* ''' - idx_hash = index_hash_map.get(idx,None) + idx_hash = index_hash_map.get(idx, None) if idx_hash is not None: return idx_hash idx_hash = sha256() From ad5f987d56b1eebb2405c9497498ddfb2299cd31 Mon Sep 17 00:00:00 2001 From: Sean Sullivan Date: Wed, 7 Feb 2024 13:19:12 -0500 Subject: [PATCH 3/4] Update tests --- tests/test_cache.py | 8 ++++---- tests/test_parameters.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_cache.py b/tests/test_cache.py index 6ce33f8..774cb7e 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -26,11 +26,11 @@ def test_du(tmp_path): def test_tile_name(): - assert cache.tile_name("abc", 1, 2, 3, "somehash") == "abc/somehash/3/1/2.png" + assert cache.tile_name("abc", 1, 2, 3, "somehash") == "ba7816bf8f01cfea4141/somehash/3/1/2.png" def test_tile_id(): - assert cache.tile_id("abc", 1, 2, 3, "somehash") == "abc_somehash_3_1_2" + assert cache.tile_id("abc", 1, 2, 3, "somehash") == "ba7816bf8f01cfea4141_somehash_3_1_2" def test_get_cache_none(): @@ -57,8 +57,8 @@ def test_set_cache(tmp_path): def test_check_cache_dir(tmp_path): - cache.check_cache_dir(tmp_path, "foo") - assert (tmp_path / "foo").exists() + cache.check_cache_dir(tmp_path, "abc") + assert (tmp_path / "ba7816bf8f01cfea4141").exists() def test_clear_hash_cache(tmp_path): diff --git a/tests/test_parameters.py b/tests/test_parameters.py index 5a811c1..871b50e 100644 --- a/tests/test_parameters.py +++ b/tests/test_parameters.py @@ -118,10 +118,10 @@ def test_get_category_field(): assert parameters.get_category_field(None) is None assert parameters.get_category_field("banana") == "banana" -def test_get_parameter_hash(): - assert parameters.get_parameter_hash({"foo": "bar", "baz": 1}) == "a6488297eb1cdaa23e196800b1c399b13d9a5c86cfecaf8f9fd0fbe9af7533db" - assert parameters.get_parameter_hash({"foo": "bar", "baz": 1, "abc": datetime(2022, 2, 17, 11, 0, 0, tzinfo=timezone.utc)}) == "88ade56886a8099e6fd3c25525a0fb9ab3938be64374569480ed3bfd7f3d70e9" - assert parameters.get_parameter_hash({}) == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" +def test_get_parameter_hash():a6488297eb1cdaa23e196800b1c399 + assert parameters.get_parameter_hash({"foo": "bar", "baz": 1}) == "a6488297eb1cdaa23e196800b1c399" + assert parameters.get_parameter_hash({"foo": "bar", "baz": 1, "abc": datetime(2022, 2, 17, 11, 0, 0, tzinfo=timezone.utc)}) == "88ade56886a8099e6fd3c25525a0fb" + assert parameters.get_parameter_hash({}) == "e3b0c44298fc1c149afbf4c8996fb9" def test_get_time_bounds_already_quantized(): now = datetime(2022, 6, 14, 12, 15, 0, tzinfo=timezone.utc) From 3bf7868aa970ac0c1e1054956771d7a1e3726ece Mon Sep 17 00:00:00 2001 From: Sean Sullivan Date: Wed, 7 Feb 2024 13:21:42 -0500 Subject: [PATCH 4/4] . --- tests/test_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_parameters.py b/tests/test_parameters.py index 871b50e..2f734c1 100644 --- a/tests/test_parameters.py +++ b/tests/test_parameters.py @@ -118,7 +118,7 @@ def test_get_category_field(): assert parameters.get_category_field(None) is None assert parameters.get_category_field("banana") == "banana" -def test_get_parameter_hash():a6488297eb1cdaa23e196800b1c399 +def test_get_parameter_hash(): assert parameters.get_parameter_hash({"foo": "bar", "baz": 1}) == "a6488297eb1cdaa23e196800b1c399" assert parameters.get_parameter_hash({"foo": "bar", "baz": 1, "abc": datetime(2022, 2, 17, 11, 0, 0, tzinfo=timezone.utc)}) == "88ade56886a8099e6fd3c25525a0fb" assert parameters.get_parameter_hash({}) == "e3b0c44298fc1c149afbf4c8996fb9"