Skip to content

Commit

Permalink
added noop Spark testing
Browse files Browse the repository at this point in the history
  • Loading branch information
blublinsky committed Oct 13, 2024
1 parent 6e2863a commit 3c9be57
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
################################################################################


from data_processing.test_support.transform import NOOPTransformConfiguration
from data_processing.test_support.transform import NOOPFolderTransform
from data_processing.test_support.transform import NOOPFolderTransform, NOOPTransformConfiguration
from data_processing.utils import get_logger
from data_processing_ray.runtime.ray import (
RayTransformLauncher,
Expand All @@ -25,7 +24,7 @@
logger = get_logger(__name__)


class NOOPFolderPythonRuntime(DefaultRayTransformRuntime):
class NOOPFolderRayRuntime(DefaultRayTransformRuntime):
def get_folders(self, data_access: DataAccess) -> list[str]:
"""
Get folders to process
Expand All @@ -47,7 +46,7 @@ def __init__(self):
Initialization
"""
super().__init__(transform_config=NOOPTransformConfiguration(clazz=NOOPFolderTransform),
runtime_class=NOOPFolderPythonRuntime)
runtime_class=NOOPFolderRayRuntime)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@
################################################################################

from data_processing_spark.test_support.transform.noop_transform import NOOPSparkTransformConfiguration
from data_processing_spark.test_support.transform.noop_folder_transform import NOOPFolderSparkTransformConfiguration
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

from data_processing.test_support.transform import NOOPFolderTransform, NOOPTransformConfiguration
from data_processing.utils import get_logger
from data_processing_spark.runtime.spark import SparkTransformLauncher
from data_processing_spark.runtime.spark import SparkTransformRuntimeConfiguration, DefaultSparkTransformRuntime
from data_processing.data_access import DataAccess


logger = get_logger(__name__)


class NOOPFolderSparkRuntime(DefaultSparkTransformRuntime):
def get_folders(self, data_access: DataAccess) -> list[str]:
"""
Get folders to process
:param data_access: data access
:return: list of folders to process
"""
return [data_access.get_input_folder()]


class NOOPFolderSparkTransformConfiguration(SparkTransformRuntimeConfiguration):
"""
Implements the SparkTransformConfiguration for NOOP as required by the PythonTransformLauncher.
NOOP does not use a RayRuntime class so the superclass only needs the base
python-only configuration.
"""

def __init__(self):
"""
Initialization
"""
super().__init__(transform_config=NOOPTransformConfiguration(clazz=NOOPFolderTransform),
runtime_class=NOOPFolderSparkRuntime)


if __name__ == "__main__":
# create launcher
launcher = SparkTransformLauncher(runtime_config=NOOPFolderSparkTransformConfiguration())
logger.info("Launching noop transform")
# Launch the ray actor(s) to process the input
launcher.launch()
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import os

from data_processing.test_support.launch.transform_test import (
AbstractTransformLauncherTest,
)
from data_processing_spark.runtime.spark import SparkTransformLauncher
from data_processing_spark.test_support.transform import NOOPFolderSparkTransformConfiguration


class TestSparkNOOPTransform(AbstractTransformLauncherTest):
"""
Extends the super-class to define the test data for the tests defined there.
The name of this class MUST begin with the word Test so that pytest recognizes it as a test class.
"""

def get_test_transform_fixtures(self) -> list[tuple]:
basedir = "../../../../test-data"
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), basedir))
fixtures = []
launcher = SparkTransformLauncher(NOOPFolderSparkTransformConfiguration())
fixtures.append((launcher, {"noop_sleep_sec": 1}, basedir + "/input", basedir + "/expected"))
return fixtures

0 comments on commit 3c9be57

Please sign in to comment.