From f097c1135a5741956c95ca33d7e6e0fec59dde9f Mon Sep 17 00:00:00 2001 From: Chen Zhang Date: Fri, 20 Dec 2024 10:50:18 -0500 Subject: [PATCH] add configuration management for SAMMY execution backends --- src/pleiades/sammy/backends/local.py | 177 +++++++++++++++++++++++++++ src/pleiades/sammy/config.py | 99 +++++++++++++++ src/pleiades/sammy/interface.py | 119 +++++++++++------- 3 files changed, 353 insertions(+), 42 deletions(-) diff --git a/src/pleiades/sammy/backends/local.py b/src/pleiades/sammy/backends/local.py index e69de29..3bbd40a 100644 --- a/src/pleiades/sammy/backends/local.py +++ b/src/pleiades/sammy/backends/local.py @@ -0,0 +1,177 @@ +#!/usr/env/bin python +"""Local backend implementation for SAMMY execution.""" +import subprocess +import textwrap +from datetime import datetime +from pathlib import Path +from uuid import uuid4 +import logging +from typing import List + +from pleiades.sammy.interface import ( + SammyRunner, + SammyFiles, + SammyExecutionResult, + EnvironmentPreparationError, + SammyExecutionError, + OutputCollectionError, +) +from pleiades.sammy.config import LocalSammyConfig + +logger = logging.getLogger(__name__) + +# Known SAMMY output file patterns +SAMMY_OUTPUT_FILES = { + 'SAMMY.LPT', # Log file + 'SAMMIE.ODF', # Output data file + 'SAMNDF.PAR', # Updated parameter file + 'SAMRESOLVED.PAR' # Additional parameter file +} + +class LocalSammyRunner(SammyRunner): + """Implementation of SAMMY runner for local installation.""" + + def __init__(self, config: LocalSammyConfig): + super().__init__(config) + self.config: LocalSammyConfig = config + self._moved_files: List[Path] = [] + + def prepare_environment(self, files: SammyFiles) -> None: + """Prepare environment for local SAMMY execution.""" + try: + logger.debug("Validating input files") + files.validate() + + # No need to validate directories as this is done in config validation + logger.debug("Environment preparation complete") + + except Exception as e: + raise EnvironmentPreparationError( + f"Environment preparation failed: {str(e)}" + ) + + def execute_sammy(self, files: SammyFiles) -> SammyExecutionResult: + """Execute SAMMY using local installation.""" + execution_id = str(uuid4()) + start_time = datetime.now() + + logger.info(f"Starting SAMMY execution {execution_id}") + logger.debug(f"Working directory: {self.config.working_dir}") + + sammy_command = textwrap.dedent(f"""\ + {self.config.sammy_executable} < None: + """Collect and validate output files after execution.""" + collection_start = datetime.now() + logger.info(f"Collecting outputs for execution {result.execution_id}") + + try: + self._moved_files = [] # Reset moved files list + found_outputs = set() + + # First check for known output files + for known_file in SAMMY_OUTPUT_FILES: + output_file = self.config.working_dir / known_file + if output_file.is_file(): + found_outputs.add(output_file) + logger.debug(f"Found known output file: {known_file}") + + # Then look for any additional SAM* files + for output_file in self.config.working_dir.glob("SAM*"): + if output_file.is_file() and output_file not in found_outputs: + found_outputs.add(output_file) + logger.debug(f"Found additional output file: {output_file.name}") + + if not found_outputs: + logger.warning("No SAMMY output files found") + if result.success: + logger.error("SAMMY reported success but produced no output files") + return + + # Move all found outputs + for output_file in found_outputs: + dest = self.config.output_dir / output_file.name + try: + if dest.exists(): + logger.debug(f"Removing existing output file: {dest}") + dest.unlink() + + output_file.rename(dest) + self._moved_files.append(dest) + logger.debug(f"Moved {output_file} to {dest}") + + except OSError as e: + self._rollback_moves() + raise OutputCollectionError( + f"Failed to move output file {output_file}: {str(e)}" + ) + + logger.info( + f"Successfully collected {len(self._moved_files)} output files in " + f"{(datetime.now() - collection_start).total_seconds():.2f} seconds" + ) + + except Exception as e: + self._rollback_moves() + raise OutputCollectionError(f"Output collection failed: {str(e)}") + + def _rollback_moves(self) -> None: + """Rollback any moved files in case of error.""" + for moved_file in self._moved_files: + try: + original = self.config.working_dir / moved_file.name + moved_file.rename(original) + except Exception as e: + logger.error(f"Failed to rollback move for {moved_file}: {str(e)}") + + def cleanup(self) -> None: + """Clean up after execution.""" + logger.debug("Performing cleanup for local backend") + self._moved_files = [] + + def validate_config(self) -> bool: + """Validate the configuration.""" + return self.config.validate() \ No newline at end of file diff --git a/src/pleiades/sammy/config.py b/src/pleiades/sammy/config.py index e69de29..ed59575 100644 --- a/src/pleiades/sammy/config.py +++ b/src/pleiades/sammy/config.py @@ -0,0 +1,99 @@ +#!/usr/env/bin python +""" +Configuration management for SAMMY execution backends. + +This module provides concrete configuration classes for each SAMMY backend type, +inheriting from the base configuration defined in the interface module. +""" + +from dataclasses import dataclass, field +from pathlib import Path +from typing import Dict, Optional +from urllib.parse import urlparse +import shutil + +from pleiades.sammy.interface import BaseSammyConfig, ConfigurationError + +@dataclass +class LocalSammyConfig(BaseSammyConfig): + """Configuration for local SAMMY installation.""" + sammy_executable: Path + shell_path: Path = Path("/bin/bash") + env_vars: Dict[str, str] = field(default_factory=dict) + + def validate(self) -> bool: + """Validate local SAMMY configuration.""" + # Validate base configuration first + super().validate() + + # Validate SAMMY executable exists and is executable + sammy_path = shutil.which(str(self.sammy_executable)) + if not sammy_path: + raise ConfigurationError(f"SAMMY executable not found: {self.sammy_executable}") + self.sammy_executable = Path(sammy_path) + + # Validate shell exists + if not self.shell_path.exists(): + raise ConfigurationError(f"Shell not found: {self.shell_path}") + + return True + +@dataclass +class DockerSammyConfig(BaseSammyConfig): + """Configuration for Docker-based SAMMY execution.""" + image_name: str + container_working_dir: Path = Path("/sammy") + volume_mappings: Dict[Path, Path] = field(default_factory=dict) + network: Optional[str] = None + + def validate(self) -> bool: + """Validate Docker SAMMY configuration.""" + # Validate base configuration first + super().validate() + + # Validate image name format + if not self.image_name: + raise ConfigurationError("Docker image name cannot be empty") + + # Validate container working directory is absolute + if not self.container_working_dir.is_absolute(): + raise ConfigurationError("Container working directory must be absolute path") + + # Validate volume mappings exist on host + for host_path in self.volume_mappings: + if not host_path.exists(): + raise ConfigurationError(f"Host path does not exist: {host_path}") + + return True + +@dataclass +class NovaSammyConfig(BaseSammyConfig): + """Configuration for NOVA web service SAMMY execution.""" + url: str + api_key: str + tool_id: str = "neutrons_imaging_sammy" + timeout: int = 3600 # Default 1 hour timeout + verify_ssl: bool = True + + def validate(self) -> bool: + """Validate NOVA SAMMY configuration.""" + # First validate base configuration + super().validate() + + # Validate URL format + try: + parsed_url = urlparse(self.url) + if not all([parsed_url.scheme, parsed_url.netloc]): + raise ConfigurationError(f"Invalid URL format: {self.url}") + except Exception as e: + raise ConfigurationError(f"URL validation failed: {str(e)}") + + # Validate API key format + if not self.api_key or len(self.api_key) < 32: + raise ConfigurationError("Invalid API key format") + + # Validate timeout + if self.timeout <= 0: + raise ConfigurationError(f"Invalid timeout value: {self.timeout}") + + return True \ No newline at end of file diff --git a/src/pleiades/sammy/interface.py b/src/pleiades/sammy/interface.py index 0aa7fad..25487b2 100644 --- a/src/pleiades/sammy/interface.py +++ b/src/pleiades/sammy/interface.py @@ -1,11 +1,17 @@ #!/usr/env/bin python -"""Interface for SAMMY execution backends.""" +""" +Interface definitions for SAMMY execution system. + +This module defines the core interfaces and data structures used across +all SAMMY backend implementations. +""" + from abc import ABC, abstractmethod from dataclasses import dataclass from enum import Enum, auto -from pathlib import Path -from typing import Optional, Dict, Union, List, Protocol, AsyncContextManager from datetime import datetime +from pathlib import Path +from typing import Optional, Dict, List, Protocol import logging logger = logging.getLogger(__name__) @@ -18,20 +24,28 @@ class SammyBackendType(Enum): @dataclass class SammyFiles: - """Container for SAMMY input/output files.""" + """Container for SAMMY input files.""" input_file: Path parameter_file: Path data_file: Path - working_dir: Path - def validate_input_files(self) -> None: - """Validate that all required input files exist.""" - for field_name in ['input_file', 'parameter_file', 'data_file']: - file_path = getattr(self, field_name) + def validate(self) -> None: + """ + Validate that all required input files exist. + + Raises: + FileNotFoundError: If any required file is missing + """ + for field_name, file_path in self.__dict__.items(): if not file_path.exists(): raise FileNotFoundError( f"{field_name.replace('_', ' ').title()} not found: {file_path}" ) + if not file_path.is_file(): + raise FileNotFoundError( + f"{field_name.replace('_', ' ').title()} is not a file: {file_path}" + ) + @dataclass class SammyExecutionResult: @@ -48,33 +62,39 @@ def runtime_seconds(self) -> float: """Calculate execution time in seconds.""" return (self.end_time - self.start_time).total_seconds() -class BackendConfig(Protocol): - """Protocol defining required configuration interface.""" +@dataclass +class BaseSammyConfig(ABC): + """Base configuration for all SAMMY backends.""" + working_dir: Path # Directory for SAMMY execution + output_dir: Path # Directory for SAMMY outputs + def validate(self) -> bool: - """Validate configuration.""" - ... - -class LocalBackendConfig(BackendConfig): - """Configuration for local SAMMY installation.""" - sammy_executable: Path - shell_path: Path = Path("/bin/bash") - -class DockerBackendConfig(BackendConfig): - """Configuration for Docker backend.""" - image_name: str - container_working_dir: Path - volume_mappings: Dict[Path, Path] # host_path -> container_path - -class NovaBackendConfig(BackendConfig): - """Configuration for NOVA web service.""" - url: str - api_key: str - tool_id: str = "neutrons_imaging_sammy" + """ + Validate the configuration. + + Returns: + bool: True if configuration is valid + + Raises: + ConfigurationError: If configuration is invalid + """ + # Validate working directory exists and is writable + if not self.working_dir.exists(): + raise ConfigurationError(f"Working directory does not exist: {self.working_dir}") + if not os.access(self.working_dir, os.W_OK): + raise ConfigurationError(f"Working directory not writable: {self.working_dir}") + + # Ensure output directory exists and is writable + self.output_dir.mkdir(parents=True, exist_ok=True) + if not os.access(self.output_dir, os.W_OK): + raise ConfigurationError(f"Output directory not writable: {self.output_dir}") + + return True class SammyRunner(ABC): """Abstract base class for SAMMY execution backends.""" - def __init__(self, config: BackendConfig): + def __init__(self, config: BaseSammyConfig): self.config = config self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") @@ -83,10 +103,11 @@ async def prepare_environment(self, files: SammyFiles) -> None: """ Prepare the execution environment. - This includes: - - Validating input files - - Setting up working directory - - Preparing files for execution + Args: + files: Container with file information + + Raises: + EnvironmentPreparationError: If preparation fails """ raise NotImplementedError @@ -125,10 +146,11 @@ async def cleanup(self, files: SammyFiles) -> None: """ Clean up resources after execution. - This includes: - - Removing temporary files - - Cleaning up containers - - Closing connections + Args: + files: Container with file information + + Raises: + CleanupError: If cleanup fails """ raise NotImplementedError @@ -138,14 +160,23 @@ async def __aenter__(self) -> 'SammyRunner': async def __aexit__(self, exc_type, exc_val, exc_tb) -> None: """Ensure cleanup on context exit.""" - await self.cleanup() + if hasattr(self, 'files'): + await self.cleanup(self.files) @abstractmethod def validate_config(self) -> bool: - """Validate backend configuration.""" + """ + Validate backend configuration. + + Returns: + bool: True if configuration is valid + + Raises: + ConfigurationError: If configuration is invalid + """ raise NotImplementedError -# Custom exceptions for better error handling +# Custom exceptions class SammyError(Exception): """Base exception for SAMMY-related errors.""" pass @@ -165,3 +196,7 @@ class OutputCollectionError(SammyError): class ConfigurationError(SammyError): """Raised when configuration is invalid.""" pass + +class CleanupError(SammyError): + """Raised when cleanup fails.""" + pass \ No newline at end of file