Skip to content

Commit

Permalink
Fix setup script generation errors
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-bdufour committed May 11, 2024
1 parent 63884b4 commit a8349a2
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 140 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@ class ProcessorMapping(UpdatableModel):
class PathMapping(UpdatableModel):
src: str
dest: Optional[str] = None
processors: Optional[List[Union[str, ProcessorMapping]]] = None
processors: Optional[List[Union[str, ProcessorMapping]]] = []

@field_validator("processors")
@classmethod
def transform_processors(
cls, input_values: Optional[List[Union[str, Dict, ProcessorMapping]]]
):
if input_values is None:
return None
return []

transformed_processors: List[ProcessorMapping] = []
for input_processor in input_values:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,5 @@ except Exception as exc: # Catch any error
print("An exception occurred while executing file: ", exc, file=sys.stderr)
sys.exit(1)

print(__snowflake_cli_native_app_internal_callback_return_list)
import json
print(json.dumps(__snowflake_cli_native_app_internal_callback_return_list))
Original file line number Diff line number Diff line change
Expand Up @@ -131,5 +131,5 @@ def _enrich_entity(
handler=entity["handler"],
)
entity["all_imports"] = _get_all_imports(
raw_imports=entity["raw_imports"], suffix_str=suffix_str
raw_imports=entity["raw_imports"] or [], suffix_str=suffix_str
)
190 changes: 54 additions & 136 deletions src/snowflake/cli/plugins/nativeapp/codegen/snowpark/python_processor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import pprint
from pathlib import Path
from typing import Any, Dict, List, Optional

Expand All @@ -9,10 +10,7 @@
ProcessorMapping,
)
from snowflake.cli.api.utils.rendering import jinja_render_from_file
from snowflake.cli.plugins.nativeapp.artifacts import (
is_glob,
resolve_without_follow,
)
from snowflake.cli.plugins.nativeapp.artifacts import BundleMap
from snowflake.cli.plugins.nativeapp.codegen.artifact_processor import ArtifactProcessor
from snowflake.cli.plugins.nativeapp.codegen.sandbox import (
ExecutionEnvironmentType,
Expand All @@ -22,10 +20,6 @@
from snowflake.cli.plugins.nativeapp.codegen.snowpark.extension_function_utils import (
_enrich_entity,
)
from snowflake.cli.plugins.nativeapp.utils import (
filter_files,
get_all_file_paths_under_dir,
)

DEFAULT_TIMEOUT = 30

Expand All @@ -37,7 +31,9 @@ def is_python_file(file_path: Path):
return file_path.suffix == ".py"


def _determine_virtual_env(processor: ProcessorMapping) -> Dict[str, Any]:
def _determine_virtual_env(
project_root: Path, processor: ProcessorMapping
) -> Dict[str, Any]:
"""
Determines a virtual environment to run the Snowpark processor in, either through the project definition or by querying the current environment.
"""
Expand All @@ -58,11 +54,14 @@ def _determine_virtual_env(processor: ProcessorMapping) -> Dict[str, Any]:
)
return {"env_type": ExecutionEnvironmentType.CONDA, "name": env_name}
elif env_type.upper() == ExecutionEnvironmentType.VENV.name:
env_path = env_props.get("path", None)
if env_path is None:
env_path_str = env_props.get("path", None)
if env_path_str is None:
cc.warning(
"No path found in project definition file for the conda environment to run the Snowpark processor in. Will attempt to auto-detect the current venv path."
)
env_path = Path(env_path_str)
if not env_path.is_absolute():
env_path = project_root / env_path
return {
"env_type": ExecutionEnvironmentType.VENV,
"path": env_path,
Expand All @@ -74,12 +73,15 @@ def _determine_virtual_env(processor: ProcessorMapping) -> Dict[str, Any]:
return {}


TEMPLATE_PATH = Path(__file__).parent / "callback_source.py.jinja"


def _execute_in_sandbox(
py_file: str, deploy_root: Path, kwargs: Dict[str, Any]
) -> Optional[List[Dict[str, Any]]]:
# Create the code snippet to be executed in the sandbox
script_source = jinja_render_from_file(
template_path=Path("./callback_source.py.jinja"), data={"py_file": py_file}
template_path=TEMPLATE_PATH, data={"py_file": py_file}
)

try:
Expand All @@ -89,6 +91,8 @@ def _execute_in_sandbox(
timeout=DEFAULT_TIMEOUT,
**kwargs,
)
cc.message("stdout:", completed_process.stdout)
cc.message("stderr:", completed_process.stderr)
except SandboxExecutionError as sdbx_err:
cc.warning(
f"Could not fetch Snowpark objects from {py_file} due to {sdbx_err}, continuing execution for the rest of the python files."
Expand Down Expand Up @@ -157,145 +161,59 @@ def process(
self,
artifact_to_process: PathMapping,
processor_mapping: Optional[ProcessorMapping],
**kwargs,
) -> Dict[Path, Optional[Any]]:
"""
Intended to be the main method which can perform all relevant processing, and/or write to a target file, which depends on the type of processor.
For SnowparkAnnotationProcessor, the target file is the setup script.
"""

kwargs = (
_determine_virtual_env(processor_mapping)
_determine_virtual_env(self.project_root, processor_mapping)
if processor_mapping is not None
else {}
)

# 1. Get all src.py -> dest.py mapping
# TODO: Logic to replaced in a follow up PR by NADE
src_py_file_to_dest_py_file_map = self.get_src_py_file_to_dest_py_file_map(
artifact_to_process
)

# 2. Get entities through Snowpark callback
src_py_file_to_collected_entities: Dict[Path, Optional[Any]] = {}
for src_file, dest_file in src_py_file_to_dest_py_file_map.items():
if dest_file.suffix == ".py":
try:
collected_entities = _execute_in_sandbox(
py_file=str(dest_file.resolve()),
deploy_root=self.deploy_root,
kwargs=kwargs,
)
except Exception as exc:
cc.warning(
f"Error processing extension functions in {src_file}: {exc}"
) # Display the actual file for the user to inspect
cc.warning(
"Skipping generating code of all objects from this file."
)
collected_entities = None

src_py_file_to_collected_entities[dest_file] = collected_entities

if collected_entities is None:
cc.message("No entities could be collected from the file path.")
continue

cc.message(f"This is the file path in deploy root: {dest_file}\n")
cc.message("This is the list of collected entities:")
cc.message(collected_entities)

# 4. Enrich entities by setting additional properties
for entity in collected_entities:
_enrich_entity(
entity=entity,
py_file=dest_file,
deploy_root=self.deploy_root,
suffix_str=".py",
)

# TODO: Temporary for testing, while feature is being built in phases
return src_py_file_to_collected_entities

def get_src_py_file_to_dest_py_file_map(
self,
artifact_to_process: PathMapping,
) -> Dict[Path, Path]:
"""
For the project definition for a native app, find the mapping between src python files and their destination python files.
"""

src_py_file_to_dest_py_file_map: Dict[Path, Path] = {}
artifact_src = artifact_to_process.src

resolved_root = self.deploy_root.resolve()
dest_path = resolve_without_follow(
Path(resolved_root, artifact_to_process.dest)
bundle_map = BundleMap(
project_root=self.project_root, deploy_root=self.deploy_root
)

# Case 1: When artifact has the following src/dest pairing
# src: john/doe/folder1/*.py OR john/doe/folder1/**/*.py
# dest: stagepath/
# OR
# Case 2: When artifact has the following src/dest pairing
# src: john/doe/folder1/**/* (in this case, all files and directories under src need to be considered)
# dest: stagepath/
if (is_glob(artifact_src) and artifact_src.endswith(".py")) or (
"**" in artifact_src
bundle_map.add_path_mapping(artifact_to_process)
for src_file, dest_file in bundle_map.all_deployed_files(
predicate=is_python_file, absolute=True
):
src_files_gen = self.project_root.glob(artifact_src)
src_py_files_gen = filter_files(
generator=src_files_gen, predicate_func=is_python_file
)
for py_file in src_py_files_gen:
_add_py_file_dest_to_dict(
dest_path=dest_path,
py_file=py_file,
src_py_file_to_dest_py_file_map=src_py_file_to_dest_py_file_map,
try:
collected_entities = _execute_in_sandbox(
py_file=str(dest_file),
deploy_root=self.deploy_root,
kwargs=kwargs,
)
except Exception as exc:
cc.warning(
f"Error processing extension functions in {src_file}: {exc}"
) # Display the actual file for the user to inspect
cc.warning("Skipping generating code of all objects from this file.")
collected_entities = None

src_py_file_to_collected_entities[dest_file] = collected_entities

if collected_entities is None:
cc.message("No entities could be collected from the file path.")
continue

cc.message(f"This is the file path in deploy root: {dest_file}\n")
cc.message("This is the list of collected entities:")
cc.message(pprint.pformat(collected_entities))

# 4. Enrich entities by setting additional properties
for entity in collected_entities:
_enrich_entity(
entity=entity,
py_file=dest_file,
deploy_root=self.deploy_root,
suffix_str=".py",
)

# Case 3: When artifact has the following src/dest pairing
# src: john/doe/folder1/*
# dest: stagepath/ (in this case, the directories under folder1 will be symlinked, which means files inside those directories also need to be considered due to implicit availability from directory symlink)
elif is_glob(artifact_src):
src_files_and_dirs = self.project_root.glob(artifact_src)
for path in src_files_and_dirs:
if path.is_dir():
file_gen = get_all_file_paths_under_dir(path)
py_file_gen = filter_files(
generator=file_gen, predicate_func=is_python_file
)
for py_file in py_file_gen:
_add_py_file_dest_to_dict(
dest_path=dest_path,
py_file=py_file,
src_py_file_to_dest_py_file_map=src_py_file_to_dest_py_file_map,
deploy_root=self.deploy_root,
)
elif path.is_file() and path.suffix == ".py":
_add_py_file_dest_to_dict(
dest_path=dest_path,
py_file=path,
src_py_file_to_dest_py_file_map=src_py_file_to_dest_py_file_map,
deploy_root=self.deploy_root,
)

# TODO: Unify Case 2 and Case 3 once symlinking "bugfix" is in.

# Case 4: When artifact has the following src/dest pairing
# src: john/doe/folder1/main.py
# dest: stagepath/stagemain.py
elif artifact_src.endswith(".py") and artifact_to_process.dest.endswith(".py"):
if dest_path.exists():
src_py_file_to_dest_py_file_map[
Path(self.project_root, artifact_src)
] = dest_path
else:
cc.warning(f"{dest_path} does not exist in {self.deploy_root}.")

# Case 5: When artifact has the following src/dest pairing
# src: john/doe/folder1.py.zip
# dest: stagepath/stagefolder1.py.zip
# TODO: Does this case 5 need to be considered?

return src_py_file_to_dest_py_file_map
# TODO: Temporary for testing, while feature is being built in phases
return src_py_file_to_collected_entities

0 comments on commit a8349a2

Please sign in to comment.