Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement migration sequencing (phase 3) #3018

Open
wants to merge 38 commits into
base: migration-sequencing-phase-2
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
0b96a34
make simple_dependency_resolver available more broadly
ericvergnaud Oct 16, 2024
a34001b
build migration steps for workflow task
ericvergnaud Oct 16, 2024
5272981
fix pylint warnings
ericvergnaud Oct 16, 2024
be30d4c
fix pylint warnings
ericvergnaud Oct 16, 2024
30872fc
add object name
ericvergnaud Oct 16, 2024
b53986a
populate object owner
ericvergnaud Oct 16, 2024
c15e230
be more defensive
ericvergnaud Oct 16, 2024
f2ce384
move last_node_id to sequencer
ericvergnaud Oct 17, 2024
9c5d569
cherry-pick changes
ericvergnaud Oct 17, 2024
27beade
use existing Ownership classes
ericvergnaud Oct 17, 2024
f642ea4
fix merge issues
ericvergnaud Oct 17, 2024
a1ae84a
create steps for source files
ericvergnaud Oct 16, 2024
990223d
fix merge issues
ericvergnaud Oct 17, 2024
56df506
register notebooks from dependency graph
ericvergnaud Oct 17, 2024
840834d
fix merge issues
ericvergnaud Oct 17, 2024
7f30ae6
mock WorkspaceCache for testing
ericvergnaud Oct 17, 2024
6a0f873
populate ownership - leave the correct implementation to issue #3003
ericvergnaud Oct 18, 2024
0d4d2b0
fix incorrect step sequence
ericvergnaud Oct 18, 2024
9603c17
fix incorrect step sequence
ericvergnaud Oct 18, 2024
5b4e0e6
basic support of cyclic dependencies
ericvergnaud Oct 18, 2024
4e2aedc
rename local
ericvergnaud Oct 18, 2024
206cb36
formatting
ericvergnaud Oct 18, 2024
5eacacc
formatting
ericvergnaud Oct 18, 2024
0ad44d5
create step for used table
ericvergnaud Oct 18, 2024
1e7e0e8
move package
ericvergnaud Oct 21, 2024
9ac4295
improve assert style
ericvergnaud Oct 21, 2024
17a33e3
formatting
ericvergnaud Oct 21, 2024
da0330d
make 'incoming' transient and improve comments
ericvergnaud Oct 21, 2024
ae1c697
Merge branch 'migration-sequencing-phase-1' into migration-sequencing…
ericvergnaud Oct 21, 2024
06ce8b6
Merge branch 'migration-sequencing-phase-2' into migration-sequencing…
ericvergnaud Oct 21, 2024
2574b1d
Merge branch 'main' into migration-sequencing-phase-1
ericvergnaud Oct 23, 2024
4abda78
Merge branch 'migration-sequencing-phase-1' into migration-sequencing…
ericvergnaud Oct 23, 2024
1502ad8
use WorkspacePathOwnership
ericvergnaud Oct 23, 2024
5f27788
Merge branch 'migration-sequencing-phase-2' into migration-sequencing…
ericvergnaud Oct 23, 2024
8c666e5
Merge branch 'main' into migration-sequencing-phase-1
ericvergnaud Oct 25, 2024
3c34640
Merge branch 'migration-sequencing-phase-1' into migration-sequencing…
ericvergnaud Oct 25, 2024
65802de
Merge branch 'migration-sequencing-phase-2' into migration-sequencing…
ericvergnaud Oct 25, 2024
56ef893
fix merge issues
ericvergnaud Oct 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
be more defensive
  • Loading branch information
ericvergnaud committed Oct 17, 2024
commit c15e2305ad7b78dde05d984c79be3285fc468c52
17 changes: 12 additions & 5 deletions src/databricks/labs/ucx/sequencing/sequencing.py
Original file line number Diff line number Diff line change
@@ -4,6 +4,7 @@
from collections.abc import Iterable
from dataclasses import dataclass, field

from databricks.sdk import WorkspaceClient
from databricks.sdk.service import jobs

from databricks.labs.ucx.source_code.graph import DependencyGraph
@@ -66,7 +67,8 @@ def find(self, object_type: str, object_id: str) -> MigrationNode | None:

class MigrationSequencer:

def __init__(self):
def __init__(self, ws: WorkspaceClient):
self._ws = ws
self._root = MigrationNode(
node_id=0, object_type="ROOT", object_id="ROOT", object_name="ROOT", object_owner="NONE"
)
@@ -83,7 +85,7 @@ def register_workflow_task(self, task: jobs.Task, job: jobs.Job, _graph: Depende
object_type="TASK",
object_id=task_id,
object_name=task.task_key,
object_owner=job_node.object_owner, # no task owner so use job one
object_owner=job_node.object_owner, # no task owner so use job one
)
job_node.required_steps.append(task_node)
if task.existing_cluster_id:
@@ -127,14 +129,17 @@ def register_cluster(self, cluster_key: str) -> MigrationNode:
cluster_node = self._find_node(object_type="CLUSTER", object_id=cluster_key)
if cluster_node:
return cluster_node
details = self._ws.clusters.get(cluster_key)
object_name = details.cluster_name if details and details.cluster_name else cluster_key
object_owner = details.creator_user_name if details and details.creator_user_name else "<UNKNOWN>"
MigrationNode.last_node_id += 1
cluster_node = MigrationNode(
node_id=MigrationNode.last_node_id,
object_type="CLUSTER",
object_id=cluster_key,
object_name=cluster_key,
object_owner="NONE",
) # TODO object_owner
object_name=object_name,
object_owner=object_owner,
)
# TODO register warehouses and policies
self._root.required_steps.append(cluster_node)
return cluster_node
@@ -155,6 +160,8 @@ def _deduplicate_steps(steps: Iterable[MigrationStep]) -> Iterable[MigrationStep
for step in steps:
existing = best_steps.get(step.step_id, None)
# keep the step with the highest step number
# TODO this possibly affects the step_number of steps that depend on this one
# but it's probably OK to not be 100% accurate initially
if existing and existing.step_number >= step.step_number:
continue
best_steps[step.step_id] = step
8 changes: 7 additions & 1 deletion tests/unit/sequencing/test_sequencing.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from databricks.sdk.service import jobs
from databricks.sdk.service.compute import ClusterDetails

from databricks.labs.ucx.sequencing.sequencing import MigrationSequencer
from databricks.labs.ucx.source_code.base import CurrentSessionState
@@ -7,16 +8,21 @@


def test_cluster_from_task_has_children(ws, simple_dependency_resolver, mock_path_lookup):
ws.clusters.get.return_value = ClusterDetails(cluster_name="my-cluster", creator_user_name="John Doe")
task = jobs.Task(task_key="test-task", existing_cluster_id="cluster-123")
settings = jobs.JobSettings(name="test-job", tasks=[task])
job = jobs.Job(job_id=1234, settings=settings)
ws.jobs.get.return_value = job
dependency = WorkflowTask(ws, task, job)
graph = DependencyGraph(dependency, None, simple_dependency_resolver, mock_path_lookup, CurrentSessionState())
sequencer = MigrationSequencer()
sequencer = MigrationSequencer(ws)
sequencer.register_workflow_task(task, job, graph)
steps = list(sequencer.generate_steps())
step = steps[-1]
assert step.step_id
assert step.object_type == "CLUSTER"
assert step.object_id == "cluster-123"
assert step.object_name == "my-cluster"
assert step.object_owner == "John Doe"
assert step.step_number == 3
assert len(step.required_step_ids) == 2