diff --git a/docker/backend.dockerfile b/docker/backend.dockerfile index 85721a6..00f07f1 100644 --- a/docker/backend.dockerfile +++ b/docker/backend.dockerfile @@ -6,9 +6,9 @@ ARG OPENJPEG_VERSION=2.4.0 ARG PIMS_REVISION ARG PIMS_VERSION ARG PLUGIN_CSV=scripts/plugin-list.csv -ARG PY_VERSION=3.8 +ARG PY_VERSION=3.10 ARG SETUPTOOLS_VERSION=59.6.0 -ARG UBUNTU_VERSION=20.04 +ARG UBUNTU_VERSION=22.04 ARG VIPS_URL=https://github.com/libvips/libvips/releases/download ARG VIPS_VERSION=8.12.1 @@ -23,7 +23,7 @@ FROM ubuntu:${UBUNTU_VERSION} ENV LANG C.UTF-8 ENV DEBIAN_FRONTEND noninteractive -ARG PY_VERSION=3.8 +ARG PY_VERSION=3.10 RUN apt-get -y update && apt-get -y install --no-install-recommends --no-install-suggests \ `# Essentials` \ @@ -167,7 +167,7 @@ ARG PIMS_PACKAGE_REVISION ARG PIMS_PACKAGE_VERSION ARG PIMS_VERSION ARG PLUGIN_CSV=scripts/plugin-list.csv -ARG PY_VERSION=3.8 +ARG PY_VERSION=3.10 ARG SETUPTOOLS_VERSION=59.6.0 ARG UBUNTU_VERSION=20.04 ARG VIPS_URL=https://github.com/libvips/libvips/releases/download diff --git a/pims-config.env b/pims-config.env index cf71129..990d078 100644 --- a/pims-config.env +++ b/pims-config.env @@ -4,6 +4,8 @@ WRITING_PATH="/data/pims/tmp" DEFAULT_IMAGE_SIZE_SAFETY_MODE="SAFE_REJECT" DEFAULT_ANNOTATION_ORIGIN="LEFT_TOP" OUTPUT_SIZE_LIMIT="10000" +CRYPT4GH_PUBLIC_KEY="" +CRYPT4GH_PRIVATE_KEY="" CYTOMINE_PUBLIC_KEY="" CYTOMINE_PRIVATE_KEY="" PIMS_URL="http://localhost-ims" \ No newline at end of file diff --git a/pims-dev-config.env b/pims-dev-config.env index 31bef23..2f57f53 100644 --- a/pims-dev-config.env +++ b/pims-dev-config.env @@ -4,6 +4,8 @@ WRITING_PATH="/data/pims/tmp" DEFAULT_IMAGE_SIZE_SAFETY_MODE=SAFE_REJECT DEFAULT_ANNOTATION_ORIGIN=LEFT_TOP OUTPUT_SIZE_LIMIT=10000 +CRYPT4GH_PUBLIC_KEY="abc" +CRYPT4GH_PRIVATE_KEY="def" CYTOMINE_PUBLIC_KEY=imageServerPublicKey CYTOMINE_PRIVATE_KEY=imageServerPrivateKey PIMS_URL=http://ims.cytomine.local diff --git a/pims/api/operations.py b/pims/api/operations.py index a600b4c..ae48065 100644 --- a/pims/api/operations.py +++ b/pims/api/operations.py @@ -11,35 +11,47 @@ # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # * See the License for the specific language governing permissions and # * limitations under the License. + import logging import os import traceback from typing import Optional -import aiofiles +import aiofiles from cytomine import Cytomine -from cytomine.models import ( - Project, ProjectCollection, Storage, UploadedFile +from cytomine.models import Project, ProjectCollection, Storage, UploadedFile +from fastapi import APIRouter, BackgroundTasks, Depends, Query +from starlette.formparsers import ( + MultiPartMessage, + MultiPartParser, + _user_safe_decode, ) -from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, Query, UploadFile from starlette.requests import Request from starlette.responses import FileResponse, JSONResponse -from starlette.formparsers import MultiPartMessage, MultiPartParser, _user_safe_decode from pims.api.exceptions import ( - AuthenticationException, BadRequestException, CytomineProblem, - check_representation_existence + AuthenticationException, + BadRequestException, + CytomineProblem, + NotFoundException, + check_representation_existence, ) from pims.api.utils.cytomine_auth import ( - get_this_image_server, parse_authorization_header, - parse_request_token, sign_token + get_this_image_server, + parse_authorization_header, + parse_request_token, + sign_token, +) +from pims.api.utils.parameter import ( + filepath_parameter, + imagepath_parameter, + sanitize_filename, ) -from pims.api.utils.parameter import filepath_parameter, imagepath_parameter, sanitize_filename from pims.api.utils.response import serialize_cytomine_model from pims.config import Settings, get_settings from pims.files.archive import make_zip_archive from pims.files.file import Path -from pims.importer.importer import run_import +from pims.importer.importer import run_import, run_import_from_path from pims.importer.listeners import CytomineListener from pims.tasks.queue import Task, send_task from pims.utils.iterables import ensure_list @@ -56,8 +68,78 @@ cytomine_logger = logging.getLogger("pims.cytomine") +REQUIRED_DIRECTORIES = ["images", "metadata"] WRITING_PATH = get_settings().writing_path + +def is_dataset_structured(dataset_path: str) -> bool: + """Check the structure of a dataset.""" + + missing_directories = [ + directory + for directory in REQUIRED_DIRECTORIES + if not os.path.isdir(os.path.join(dataset_path, directory)) + ] + + return missing_directories == [] + + +@router.post("/import", tags=["Import"]) +def import_dataset( + request: Request, + host: str = Query(..., description="The Cytomine host"), + path: str = Query(..., description="The absolute path to the datasets to import"), + storage_id: int = Query(..., description="The storage where to import the dataset"), + config: Settings = Depends(get_settings) +) -> JSONResponse: + """Import a dataset from a given absolute path.""" + + if not storage_id: + raise BadRequestException(detail="'storage' parameter is missing.") + + if not os.path.exists(path): + raise NotFoundException(detail="The provided dataset path does not exist.") + + datasets = [ + dataset_path + for dataset in os.listdir(path) + if (dataset_path := os.path.join(path, dataset)) + and is_dataset_structured(dataset_path) + ] + + public_key, signature = parse_authorization_header(request.headers) + cytomine_auth = (host, config.cytomine_public_key, config.cytomine_private_key) + + with Cytomine(*cytomine_auth, configure_logging=False) as c: + if not c.current_user: + raise AuthenticationException("PIMS authentication to Cytomine failed.") + + this = get_this_image_server(config.pims_url) + cyto_keys = c.get(f"userkey/{public_key}/keys.json") + private_key = cyto_keys["privateKey"] + + if sign_token(private_key, parse_request_token(request)) != signature: + raise AuthenticationException("Authentication to Cytomine failed") + + c.set_credentials(public_key, private_key) + user = c.current_user + + storage = Storage().fetch(storage_id) + if not storage: + raise CytomineProblem(f"Storage {storage_id} not found") + + for dataset in datasets: + run_import_from_path( + dataset, + cytomine_auth, + storage_id, + this.id, + user.id, + ) + + return JSONResponse(content={"status": "ok"}) + + @router.post('/upload', tags=['Import']) async def import_direct_chunks( request: Request, diff --git a/pims/config.py b/pims/config.py index 5b295e4..02f77b1 100644 --- a/pims/config.py +++ b/pims/config.py @@ -23,6 +23,7 @@ class ReadableSettings(BaseSettings): root: str + dataset_path: str = "/dataset" pending_path: str = "/tmp/uploaded" writing_path: str = "/data/pims/tmp" checker_resolution_file: str = "checkerResolution.csv" @@ -56,6 +57,9 @@ class Config: class Settings(ReadableSettings): + crypt4gh_public_key: str + crypt4gh_private_key: str + cytomine_public_key: str cytomine_private_key: str diff --git a/pims/files/file.py b/pims/files/file.py index 8c12ebd..10ebf5a 100644 --- a/pims/files/file.py +++ b/pims/files/file.py @@ -42,7 +42,7 @@ SPECTRAL_STEM = "spectral" HISTOGRAM_STEM = "histogram" -_NUM_SIGNATURE_BYTES = 262 +NUM_SIGNATURE_BYTES = 262 class FileRole(str, Enum): @@ -367,7 +367,7 @@ def signature(self) -> bytearray: if not self.is_file(): return bytearray() with self.resolve().open('rb') as fp: - return bytearray(fp.read(_NUM_SIGNATURE_BYTES)) + return bytearray(fp.read(NUM_SIGNATURE_BYTES)) @property def path(self) -> Path: diff --git a/pims/formats/common/dicom.py b/pims/formats/common/dicom.py index e68f76b..37c13fb 100644 --- a/pims/formats/common/dicom.py +++ b/pims/formats/common/dicom.py @@ -11,15 +11,15 @@ # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # * See the License for the specific language governing permissions and # * limitations under the License. + import logging from datetime import datetime -from typing import List, Optional, Union +from typing import List, Optional import numpy as np import pyvips from pint import Quantity from pydicom import FileDataset, dcmread -from pydicom.dicomdir import DicomDir from pydicom.multival import MultiValue from pydicom.uid import ImplicitVRLittleEndian from pyvips import GValue @@ -56,9 +56,12 @@ def _pydicom_dcmread(path, *args, **kwargs): return dcm -def cached_dcmread(format: AbstractFormat) -> Union[FileDataset, DicomDir]: +def cached_dcmread(format: AbstractFormat) -> FileDataset: return format.get_cached( - '_dcmread', _pydicom_dcmread, format.path.resolve(), force=True + '_dcmread', + _pydicom_dcmread, + format.path.resolve(), + force=True, ) diff --git a/pims/formats/utils/abstract.py b/pims/formats/utils/abstract.py index 0c0846a..b131baf 100644 --- a/pims/formats/utils/abstract.py +++ b/pims/formats/utils/abstract.py @@ -19,6 +19,7 @@ from typing import Any, Dict, List, Optional, TYPE_CHECKING, Type from pims.cache import SimpleDataCache, cached_property +from pims.config import get_settings from pims.formats.utils.checker import AbstractChecker from pims.formats.utils.convertor import AbstractConvertor from pims.formats.utils.histogram import AbstractHistogramReader @@ -89,6 +90,17 @@ def __init__(self, path: Path, existing_cache: Dict[str, Any] = None): self.histogram_reader = self.histogram_reader_class(self) + settings = get_settings() + credentials = { + "public_key": settings.crypt4gh_public_key, + "private_key": settings.crypt4gh_private_key, + } + + for component in (self.parser, self.reader): + if hasattr(component, "set_credentials"): + component.set_credentials(credentials) + + @classmethod def init(cls): """ @@ -184,6 +196,13 @@ def match(cls, cached_path: CachedDataPath) -> bool: Whether it is this format """ if cls.checker_class: + if hasattr(cls.checker_class, "CREDENTIALS"): + settings = get_settings() + cls.checker_class.CREDENTIALS = { + "public_key": settings.crypt4gh_public_key, + "private_key": settings.crypt4gh_private_key, + } + return cls.checker_class.match(cached_path) return False diff --git a/pims/importer/importer.py b/pims/importer/importer.py index 5c8db43..4977296 100644 --- a/pims/importer/importer.py +++ b/pims/importer/importer.py @@ -11,47 +11,60 @@ # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # * See the License for the specific language governing permissions and # * limitations under the License. + import logging import os import shutil -from typing import List, Optional -import aiofiles +from typing import List, Optional, Tuple from celery import group, signature from celery.result import allow_join_result -from fastapi import Request, UploadFile # noqa +from cytomine.models import ProjectCollection, UploadedFile from pims.api.exceptions import ( - BadRequestException, FilepathNotFoundProblem, - NoMatchingFormatProblem + BadRequestException, + FilepathNotFoundProblem, + NoMatchingFormatProblem, ) from pims.api.utils.models import HistogramType from pims.config import get_settings from pims.files.archive import Archive, ArchiveError from pims.files.file import ( - EXTRACTED_DIR, HISTOGRAM_STEM, ORIGINAL_STEM, PROCESSED_DIR, Path, - SPATIAL_STEM, UPLOAD_DIR_PREFIX + EXTRACTED_DIR, + HISTOGRAM_STEM, + ORIGINAL_STEM, + PROCESSED_DIR, + SPATIAL_STEM, + UPLOAD_DIR_PREFIX, + Path, ) from pims.files.histogram import Histogram from pims.files.image import Image from pims.formats import AbstractFormat from pims.formats.utils.factories import ( ImportableFormatFactory, - SpatialReadableFormatFactory + SpatialReadableFormatFactory, ) from pims.importer.listeners import ( - CytomineListener, ImportEventType, ImportListener, - StdoutListener + CytomineListener, + ImportEventType, + ImportListener, + StdoutListener, ) from pims.processing.histograms.utils import build_histogram_file -from pims.tasks.queue import BG_TASK_MAPPING, CELERY_TASK_MAPPING, Task, func_from_str +from pims.tasks.queue import ( + BG_TASK_MAPPING, + CELERY_TASK_MAPPING, + Task, + func_from_str, +) from pims.utils.strings import unique_name_generator log = logging.getLogger("pims.app") +FILE_ROOT_PATH = Path(get_settings().root) PENDING_PATH = Path(get_settings().pending_path) WRITING_PATH = Path(get_settings().writing_path) -FILE_ROOT_PATH = Path(get_settings().root) class FileErrorProblem(BadRequestException): @@ -492,11 +505,96 @@ def _sequential_imports(): _sequential_imports() return imported + + def import_from_path(self): + """Import a file from a given path.""" + + try: + self.notify(ImportEventType.START_DATA_EXTRACTION, self.pending_file) + + upload_dir_name = Path( + f"{UPLOAD_DIR_PREFIX}" + f"{str(unique_name_generator())}" + ) + self.upload_dir = FILE_ROOT_PATH / upload_dir_name + self.mkdir(self.upload_dir) + + if self.pending_name: + name = self.pending_name + else: + name = self.pending_file.name + self.upload_path = self.upload_dir / name + + self.mksymlink(self.upload_path, self.pending_file) + + self.notify( + ImportEventType.MOVED_PENDING_FILE, + self.pending_file, + self.upload_path, + ) + self.notify(ImportEventType.END_DATA_EXTRACTION, self.upload_path) + + self.notify(ImportEventType.START_FORMAT_DETECTION, self.upload_path) + + format_factory = ImportableFormatFactory() + format = format_factory.match(self.upload_path) + + if format is None: + self.notify(ImportEventType.ERROR_NO_FORMAT, self.upload_path) + raise NoMatchingFormatProblem(self.upload_path) + self.notify( + ImportEventType.END_FORMAT_DETECTION, + self.upload_path, format + ) + + self.processed_dir = self.upload_dir / Path(PROCESSED_DIR) + self.mkdir(self.processed_dir) + + original_filename = Path(f"{ORIGINAL_STEM}.{format.get_identifier()}") + self.original_path = self.processed_dir / original_filename + + self.mksymlink(self.original_path, self.upload_path) + assert self.original_path.has_original_role() + + self.notify(ImportEventType.START_INTEGRITY_CHECK, self.original_path) + self.original = Image(self.original_path, format=format) + errors = self.original.check_integrity(check_metadata=True) + if len(errors) > 0: + self.notify( + ImportEventType.ERROR_INTEGRITY_CHECK, + self.original_path, + integrity_errors=errors, + ) + raise ImageParsingProblem(self.original) + self.notify(ImportEventType.END_INTEGRITY_CHECK, self.original) + + if not format.is_spatial(): + raise NotImplementedError() + + self.deploy_spatial(format) + + self.deploy_histogram(self.original.get_spatial()) + + self.notify( + ImportEventType.END_SUCCESSFUL_IMPORT, + self.upload_path, + self.original, + ) + return [self.upload_path] + except Exception as e: + self.notify( + ImportEventType.FILE_ERROR, + self.upload_path, + exception=e, + ) + raise e def run_import( - filepath: str, name: str, extra_listeners: Optional[List[ImportListener]] = None, - prefer_copy: bool = False + filepath: str, + name: str, + extra_listeners: Optional[List[ImportListener]] = None, + prefer_copy: bool = False, ): pending_file = Path(filepath) @@ -508,4 +606,57 @@ def run_import( listeners = [StdoutListener(name)] + extra_listeners fi = FileImporter(pending_file, name, listeners) - fi.run(prefer_copy) \ No newline at end of file + fi.run(prefer_copy) + + +def get_folder_size(folder_path) -> int: + """Get the total size in bytes of a folder.""" + total_size = 0 + for dirpath, _, filenames in os.walk(folder_path): + for file in filenames: + file_path = os.path.join(dirpath, file) + total_size += os.path.getsize(file_path) + + return total_size + + +def run_import_from_path( + dataset_path: str, + cytomine_auth: Tuple[str, str, str], + storage_id: int, + image_server_id: int, + user_id: int, +) -> None: + """Run importer from a given path.""" + + images_path = Path(os.path.join(dataset_path, "images")) + for item in images_path.iterdir(): + if not item.is_dir(): + continue + + image_path = os.path.join(images_path, item) + + uf = UploadedFile( + original_filename=item.name, + filename=image_path, + size=get_folder_size(image_path), + ext="", + content_type="", + id_storage=storage_id, + id_user=user_id, + id_image_server=image_server_id, + status=UploadedFile.UPLOADED, + ) + + listeners = [ + StdoutListener(item.name), + CytomineListener( + cytomine_auth, + uf, + projects=ProjectCollection(), + user_properties=iter([]), + ), + ] + + fi = FileImporter(Path(image_path), item.name, listeners) + fi.import_from_path() diff --git a/requirements.txt b/requirements.txt index 5b6cf23..92e704f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -59,6 +59,8 @@ cligj==0.7.2 # via rasterio contourpy==1.0.7 # via matplotlib +cryptography==43.0.1 + # via cytomine-pims (setup.py) cycler==0.11.0 # via matplotlib cytomine-python-client==2.9.0 @@ -154,7 +156,7 @@ pydantic==1.10.5 # via # cytomine-pims (setup.py) # fastapi -pydicom==2.3.1 +pydicom==3.0.1 # via cytomine-pims (setup.py) pygments==2.14.0 # via rich diff --git a/scripts/plugin-list.csv b/scripts/plugin-list.csv index f404fb8..13d9431 100644 --- a/scripts/plugin-list.csv +++ b/scripts/plugin-list.csv @@ -1,4 +1,4 @@ enabled,name,git_url,git_branch_or_tag,resolution_order 1,pims-plugin-format-openslide,https://github.com/Cytomine-ULiege/pims-plugin-format-openslide.git,5f8a92e37d2a47145f0b375db541b591490361bc,1 1,pims-plugin-format-bioformats,https://github.com/Cytomine-ULiege/pims-plugin-format-bioformats.git,29acd1307f4d9026a88baae1b04aa07e925ee2e5,-1 -1,pims-plugin-format-dicom,https://github.com/Cytomine-ULiege/pims-plugin-format-dicom.git,12de2d388f9a6ffc2e302a298c7dbb02dc9396ce,2 +1,pims-plugin-format-dicom,https://github.com/Cytomine-ULiege/pims-plugin-format-dicom.git,5392487a9b778eff4e9ea325fa2995b5ef38fe90,2 diff --git a/setup.py b/setup.py index 5e6cfea..ce9c7b2 100644 --- a/setup.py +++ b/setup.py @@ -34,6 +34,7 @@ # What packages are required for this module to be executed? REQUIRED = [ + 'cryptography==43.0.1', 'uvicorn[standard]>=0.13.4', 'asgiref>=3.4.1', 'fastapi>=0.65.1,<=0.68.2', @@ -56,7 +57,7 @@ 'imagecodecs>=2021.3.31', 'scikit-image>=0.18', 'zarr>=2.8.3', - 'pydicom>=2.2.2', + 'pydicom>=3.0.0', 'python-gdcm>=3.0.10', 'python-dateutil>=2.7.0', diff --git a/tests/pims-config.env b/tests/pims-config.env index b60fe84..69a1cce 100644 --- a/tests/pims-config.env +++ b/tests/pims-config.env @@ -3,6 +3,8 @@ PENDING_PATH="/tmp/uploaded" DEFAULT_IMAGE_SIZE_SAFETY_MODE="SAFE_REJECT" DEFAULT_ANNOTATION_ORIGIN="LEFT_TOP" OUTPUT_SIZE_LIMIT="10000" +CRYPT4GH_PUBLIC_KEY="abc" +CRYPT4GH_PRIVATE_KEY="def" CYTOMINE_PUBLIC_KEY="123" CYTOMINE_PRIVATE_KEY="456" PIMS_URL="http://localhost-ims" \ No newline at end of file