-
Notifications
You must be signed in to change notification settings - Fork 121
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update get_relation_without_caching
to use show with wildcard search in place of describe extended
#501
Update get_relation_without_caching
to use show with wildcard search in place of describe extended
#501
Changes from 3 commits
ded2932
6d8a98b
73df3e4
da38c58
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -234,8 +234,7 @@ def get_relations_without_caching(self, relation: DatabricksRelation) -> Table: | |
kwargs = {"relation": relation} | ||
|
||
new_rows: List[Tuple[Optional[str], str, str, str]] | ||
if relation.database is not None: | ||
assert relation.schema is not None | ||
if all([relation.database, relation.schema]): | ||
tables = self.connections.list_tables( | ||
database=relation.database, schema=relation.schema | ||
) | ||
|
@@ -256,47 +255,63 @@ def get_relations_without_caching(self, relation: DatabricksRelation) -> Table: | |
|
||
# if there are any table types to be resolved | ||
if any(not row[3] for row in new_rows): | ||
# Get view names and create a dictionay of view name to materialization | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo |
||
# Get view names and tables and create a dictionary of name to materialization | ||
relation_all_tables = self.Relation.create( | ||
benc-db marked this conversation as resolved.
Show resolved
Hide resolved
|
||
database=relation.database, schema=relation.schema, identifier="*" | ||
) | ||
with self._catalog(relation.database): | ||
views = self.execute_macro(SHOW_VIEWS_MACRO_NAME, kwargs=kwargs) | ||
tables = self.execute_macro( | ||
SHOW_TABLE_EXTENDED_MACRO_NAME, kwargs={"relation": relation_all_tables} | ||
) | ||
|
||
view_names: Dict[str, bool] = { | ||
view["viewName"]: view.get("isMaterialized", False) for view in views | ||
} | ||
|
||
def parse_type(information: str) -> str: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The information in this format is a |
||
type_entry = [ | ||
entry.strip() | ||
for entry in information.split("\n") | ||
if entry.split(":")[0] == "Type" | ||
] | ||
return type_entry[0] if type_entry else "" | ||
|
||
table_names: Dict[str, bool] = { | ||
table["tableName"]: (parse_type(table["information"]) == "STREAMING_TABLE") | ||
for table in tables | ||
} | ||
|
||
# a function to resolve an unknown table type | ||
def typeFromNames( | ||
database: Optional[str], schema: str, name: str | ||
) -> DatabricksRelationType: | ||
if name in view_names: | ||
def typeFromNames(database: Optional[str], name: str) -> DatabricksRelationType: | ||
if is_hive_metastore(database): | ||
mikealfare marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return DatabricksRelationType.Table | ||
elif name in view_names: | ||
# it is either a view or a materialized view | ||
return ( | ||
DatabricksRelationType.MaterializedView | ||
if view_names[name] | ||
else DatabricksRelationType.View | ||
) | ||
elif is_hive_metastore(database): | ||
return DatabricksRelationType.Table | ||
elif name in table_names: | ||
# it is either a table or a streaming table | ||
return ( | ||
DatabricksRelationType.StreamingTable | ||
if table_names[name] | ||
else DatabricksRelationType.Table | ||
) | ||
else: | ||
# not a view so it might be a streaming table | ||
# get extended information to determine | ||
rel = self.Relation.create(database, schema, name) | ||
rel = self._set_relation_information(rel) | ||
if ( | ||
rel.metadata is not None | ||
and rel.metadata.get("Type", "table") == "STREAMING_TABLE" | ||
): | ||
return DatabricksRelationType.StreamingTable | ||
else: | ||
return DatabricksRelationType.Table | ||
raise dbt.exceptions.DbtRuntimeError( | ||
f"Unexpected relation type discovered: Database:{database}, Relation:{name}" | ||
) | ||
|
||
# create a new collection of rows with the correct table types | ||
new_rows = [ | ||
( | ||
row[0], | ||
row[1], | ||
row[2], | ||
str(row[3] if row[3] else typeFromNames(row[0], row[1], row[2])), | ||
str(row[3] if row[3] else typeFromNames(row[0], row[2])), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
) | ||
for row in new_rows | ||
] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
databricks-sql-connector>=2.9.3, <3.0.0 | ||
dbt-spark==1.7.1 | ||
databricks-sdk==0.9.0 | ||
databricks-sdk>=0.9.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This file did not align with |
||
keyring>=23.13.0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
assert
does not do anything in production.pytest
flips a debug switch when running to make the assert active, but you would never want that in production. So this actually doesn't do anything, which can be misleading as it suggests we're testing when we're not.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My understanding is that the point of this is to detect if the assertion is violated when debugging/testing. It doesn't actually do nothing, it just doesn't execute in the deployed code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
basically, it is there to verify what we believe to be an invariant. If it is actually invariant, then we don't need the if clause below. If clause is definitely safer though.