Skip to content

Commit

Permalink
Define event_monitoring_live_v1 views in view.sql files (#4576)
Browse files Browse the repository at this point in the history
* Define `event_monitoring_live_v1` views in `view.sql` files.

So they get automatically deployed by the `bqetl_artifact_deployment.publish_views` Airflow task.

* Support materialized views in view naming validation.

* Handle `IF NOT EXISTS` in view naming validation.

* Use regular expression to extract view ID in view naming validation.

This simplifies the logic and avoids a sqlparse bug where it doesn't recognize the `MATERIALIZED` keyword.

* Update other view regular expressions to allow for materialized views.
  • Loading branch information
sean-rose authored Dec 8, 2023
1 parent ff6f08a commit 2c4cc5e
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 33 deletions.
27 changes: 11 additions & 16 deletions bigquery_etl/view/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@

# Regex matching CREATE VIEW statement so it can be removed to get the view query
CREATE_VIEW_PATTERN = re.compile(
r"CREATE\s+OR\s+REPLACE\s+VIEW\s+[^\s]+\s+AS", re.IGNORECASE
r"CREATE(?:\s+OR\s+REPLACE)?(?:\s+MATERIALIZED)?\s+VIEW(?:\s+IF\s+NOT\s+EXISTS)?\s+[^\s]+\s+AS",
re.IGNORECASE,
)


Expand Down Expand Up @@ -184,22 +185,16 @@ def _valid_fully_qualified_references(self):

def _valid_view_naming(self):
"""Validate that the created view naming matches the directory structure."""
parsed = sqlparse.parse(self.content)[0]
tokens = [
t
for t in parsed.tokens
if not (t.is_whitespace or isinstance(t, sqlparse.sql.Comment))
]
is_view_statement = (
" ".join(tokens[0].normalized.split()) == "CREATE OR REPLACE"
and tokens[1].normalized == "VIEW"
)
if is_view_statement:
target_view = str(tokens[2]).strip().split()[0]
sql = sqlparse.format(self.content, strip_comments=True).strip()
if view_statement_match := re.match(
r"CREATE(?:\s+OR\s+REPLACE)?(?:\s+MATERIALIZED)?\s+VIEW(?:\s+IF\s+NOT\s+EXISTS)?"
r"\s+(?P<view_id>(?:(?:`?[\w-]+`?\.)?`?\w+`?\.)?`?\w+`?)",
sql,
re.IGNORECASE,
):
target_view = view_statement_match["view_id"].replace("`", "")
try:
[project_id, dataset_id, view_id] = target_view.replace("`", "").split(
"."
)
[project_id, dataset_id, view_id] = target_view.split(".")
if not (
self.name == view_id
and self.dataset == dataset_id
Expand Down
2 changes: 1 addition & 1 deletion bqetl_project.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ dry_run:
- sql/moz-fx-data-shared-prod/org_mozilla_firefox_beta_derived/experiment_events_live_v1/init.sql
- sql/moz-fx-data-shared-prod/telemetry_derived/experiment_enrollment_cumulative_population_estimate_v1/view.sql
- sql/moz-fx-data-shared-prod/telemetry/experiment_enrollment_cumulative_population_estimate/view.sql
- sql/moz-fx-data-shared-prod/**/event_monitoring_live_v1/init.sql
- sql/moz-fx-data-shared-prod/**/event_monitoring_live_v1/view.sql
- sql/moz-fx-data-shared-prod/monitoring/event_monitoring_live/view.sql
# Already exists (and lacks an "OR REPLACE" clause)
- sql/moz-fx-data-shared-prod/org_mozilla_firefox_derived/clients_first_seen_v1/init.sql
Expand Down
29 changes: 14 additions & 15 deletions sql_generators/glean_usage/event_monitoring_live.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class EventMonitoringLive(GleanTable):

def __init__(self) -> None:
"""Initialize materialized view generation."""
self.no_init = False
self.no_init = True
self.per_app_id_enabled = True
self.per_app_enabled = False
self.across_apps_enabled = True
Expand All @@ -38,9 +38,10 @@ def __init__(self) -> None:
def generate_per_app_id(
self, project_id, baseline_table, output_dir=None, use_cloud_function=True, app_info=[]
):
"""Generate per-app_id views."""
tables = table_names_from_baseline(baseline_table, include_project_id=False)

init_filename = f"{self.target_table_id}.init.sql"
view_filename = f"{self.target_table_id}.view.sql"
metadata_filename = f"{self.target_table_id}.metadata.yaml"

table = tables[f"{self.prefix}"]
Expand Down Expand Up @@ -68,23 +69,21 @@ def generate_per_app_id(
Artifact = namedtuple("Artifact", "table_id basename sql")
artifacts = []

if not self.no_init:
init_sql = render(
init_filename, template_folder=PATH / "templates", **render_kwargs
)
metadata = render(
metadata_filename,
template_folder=PATH / "templates",
format=False,
**render_kwargs,
)
artifacts.append(Artifact(table, "metadata.yaml", metadata))
view_sql = render(
view_filename, template_folder=PATH / "templates", **render_kwargs
)
metadata = render(
metadata_filename,
template_folder=PATH / "templates",
format=False,
**render_kwargs,
)
artifacts.append(Artifact(table, "metadata.yaml", metadata))

skip_existing_artifact = self.skip_existing(output_dir, project_id)

if output_dir:
if not self.no_init:
artifacts.append(Artifact(table, "init.sql", init_sql))
artifacts.append(Artifact(table, "view.sql", view_sql))

for artifact in artifacts:
destination = (
Expand Down
3 changes: 2 additions & 1 deletion sql_generators/stable_views/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ def write_view_if_not_exists(target_project: str, sql_dir: Path, schema: SchemaF
from sql_generators.stable_views import VIEW_METADATA_TEMPLATE, VIEW_QUERY_TEMPLATE

VIEW_CREATE_REGEX = re.compile(
r"CREATE OR REPLACE VIEW\n\s*[^\s]+\s*\nAS", re.IGNORECASE
r"CREATE(?:\s+OR\s+REPLACE)?(?:\s+MATERIALIZED)?\s+VIEW(?:\s+IF\s+NOT\s+EXISTS)?\s+[^\s]+\s+AS",
re.IGNORECASE,
)

SKIP_VIEW_SCHEMA = {
Expand Down

0 comments on commit 2c4cc5e

Please sign in to comment.