Skip to content

Commit

Permalink
Revert "Allow multiple folders in class_package node registration k…
Browse files Browse the repository at this point in the history
…ey (#123)" (#124)

This reverts commit 328076b.
  • Loading branch information
parul-l authored Mar 19, 2024
1 parent 328076b commit 9d2952b
Show file tree
Hide file tree
Showing 8 changed files with 17 additions and 223 deletions.
74 changes: 0 additions & 74 deletions README_METADATA.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ Users are free to add whatever keys and configuration they wish in this section.
In this document, we will cover the following metadata keys:

- traverser
- class_package
- data_object
- section_registry
- section_run
Expand All @@ -20,8 +19,6 @@ For instance, you might have a configuration
"traverser": "DepthFirstTraverser",
"class_package": ["./src", "./sample_project"],
"data_object": {
"read_from_cache": false,
"read_filename": "/tmp/data_object_20190618.dill",
Expand Down Expand Up @@ -73,77 +70,6 @@ The way users defined what to use is with the `traverser` key in `metadata`. If
}
```

# class_package
When creating additional nodes for your projects, you can specify where to look for potential nodes to register, via

(1) setting the `PRIMROSE_EXT_NODE_PACKAGE` environment variable, or
(2) specifying the folders in the `metadata` portion of your primrose config via the `class_package` key.


For example, if all custom nodes are in the `src` folder,
```
├── config
│ ├── my_primrose_config.yml
├── src
│ ├── readers
│ │ ├── reader_node.py
│ ├── pipelines
│ │ ├── pipeline_node.py
```
we can set `PRIMROSE_EXT_NODE_PACKAGE=./src`, or define the primrose config as
```
metadata:
section_registry:
- reader_config
- writer_config
class_package:
- './src'
implementation_config:
reader_config:
...
writer_config:
...
```

The latter is particularly useful when mulitple projects are being built off the same primrose package. For example, given the following folder structure
```
├── config
│ ├── primrose_config1.yml
│ ├── primrose_config2.yml
├── src
│ ├── readers
│ │ ├── reader_node.py
│ ├── pipelines
│ │ ├── pipeline_node.py
├── projects
│ ├── sample_project1
│ │ ├── sample_project1_node.py
│ ├── sample_project2
│ │ ├── sample_project2_node.py
```
we can define the primrose config for `sample_project1` as
```
metadata:
class_package:
- './src'
- './projects/sample_project1'
implementation_config:
...
```
and the primrose config for `sample_project2` as
```
metadata:
class_package:
- './src'
- './projects/sample_project2'
implementation_config:
...
```
This ensures all classes within the `src` and `projects/sample_project{i}` folders are considered when registering nodes specified in the primrose configs.

# section_registry key
The default assumption of
```
Expand Down
26 changes: 10 additions & 16 deletions primrose/configuration/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,42 +455,36 @@ def _import_file(full_name, path):
def _get_file_candidates(self):
"""Get file candidates to search through when specifying a class package.
First consider value in PRIMROSE_EXT_NODE_PACKAGE but give priority to `class_package`
in configuration metadata. That is, if `class_package` is set in the configuration metadata,
it will override the PRIMROSE_EXT_NODE_PACKAGE variable. If nothing is specified, in either
Priority will first consider environment variable PRIMROSE_EXT_NODE_PACKAGE. If unset, will
search the configuration metadata for key `class_package`. If nothing is specified, in either
location, an empty list is returned.
Returns:
list of potential files to search for classes to register
"""
# for now assume packages/top level only
if CLASS_ENV_PACKAGE_KEY in os.environ:
logging.info("Using package from environment variable")
pkg_name = os.environ[CLASS_ENV_PACKAGE_KEY]
if self.config_metadata and "class_package" in self.config_metadata:
# overwrites package set in environment variable CLASS_ENV_PACKAGE_KEY
logging.info("Using package from configuration metadata")
pkg_name = self.config_metadata["class_package"]
elif self.config_metadata:
if "class_package" in self.config_metadata:
pkg_name = self.config_metadata["class_package"]
else:
return []
else:
return []
# look for path to module to find potential file candidates
try:
# if we are passed something like __init__.py, grab the package
if isinstance(pkg_name, str) and os.path.isfile(pkg_name):
if os.path.isfile(pkg_name):
pkg_name = os.path.dirname(pkg_name)
# if we have an actual package from pip install
if isinstance(pkg_name, str) and not os.path.isdir(pkg_name):
if not os.path.isdir(pkg_name):
pkg_name = os.path.dirname(importlib.import_module(pkg_name).__file__)
except ModuleNotFoundError:
logging.warning("Could not find module specified for external node configuration")
return []

if isinstance(pkg_name, str):
pkg_name = [pkg_name]

candidates = []
for pkg in pkg_name:
candidates += glob.glob(os.path.join(pkg, "**", "*.py"), recursive=True)
candidates = glob.glob(os.path.join(pkg_name, "**", "*.py"), recursive=True)

return candidates

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ dependencies = [
"mysql-connector-python>=8.0.32",
"slackclient>=2.9.4",
"testfixtures>=7.1.0",
"moto==4.1.4",
"moto>=4.1.4",
"nltk>=3.8.1",
"pydot>=1.4.2",
]
Expand Down
53 changes: 0 additions & 53 deletions test/hello_world_read_process_write.json

This file was deleted.

49 changes: 0 additions & 49 deletions test/sample_project/change_column_order.py

This file was deleted.

15 changes: 0 additions & 15 deletions test/sample_project/data/tennis_output.csv

This file was deleted.

12 changes: 6 additions & 6 deletions test/test_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,19 +603,19 @@ def test_class_package(mock_env):
NodeFactory().unregister("TestExtNode")


def test_class_package_override_env(mock_env):
def test_env_override_class_package(mock_env):
config = {
"metadata": {"class_package": "junk"},
"implementation_config": {
"reader_config": {"read_data": {"class": "TestExtNode", "destinations": []}}
},
}
with pytest.raises(Exception) as e:
Configuration(config_location=None, is_dict_config=True, dict_config=config)
assert (
"Cannot register node class TestExtNode"
in str(e)
config = Configuration(
config_location=None, is_dict_config=True, dict_config=config
)
assert config.config_string
assert config.config_hash
NodeFactory().unregister("TestExtNode")


def test_incorrect_class_package():
Expand Down
9 changes: 0 additions & 9 deletions test/test_dag_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from abc import abstractmethod
from primrose.base.writer import AbstractWriter

TEST_DIR = os.path.dirname(os.path.abspath(__file__))

def test_run():
config = {
Expand Down Expand Up @@ -767,11 +766,3 @@ def test_run_pruned():
("root", "INFO", "left node!"),
("root", "INFO", "All done. Bye bye!"),
)


def test_class_packages_run():
config_loc = os.path.join(TEST_DIR, "hello_world_read_process_write.json")
configuration = Configuration(config_location=config_loc)

runner = DagRunner(configuration)
runner.run()

0 comments on commit 9d2952b

Please sign in to comment.