From ca2aab98960cd827f965e286648e1d5abadb9b79 Mon Sep 17 00:00:00 2001
From: bill-warner <71764352+bill-warner@users.noreply.github.com>
Date: Fri, 21 May 2021 12:09:00 +0300
Subject: [PATCH 1/2] Bigquery Web: Add integration tests

---
 .scripts/README.md                            | 36 ++++++++++
 .scripts/e2e.sh                               |  1 +
 .scripts/integration_test.sh                  | 54 +++++++++++++++
 .scripts/pr_check.sh                          |  1 +
 .scripts/run_config.sh                        | 15 +++-
 .scripts/run_test.sh                          |  1 +
 .../web/v1/integration_tests.json             | 69 +++++++++++++++++++
 .../events_staged_integration_test_1.json     | 13 ++++
 .../events_staged_integration_test_2.json     | 13 ++++
 .../events_staged_integration_test_3.json     | 13 ++++
 .../events_staged_integration_test_4.json     | 13 ++++
 .../events_staged_integration_test_5.json     | 13 ++++
 .../perm_integration_test_tables.json         | 29 ++++++++
 .../web/v1/bigquery_variables.yml.tmpl        |  8 +++
 14 files changed, 276 insertions(+), 3 deletions(-)
 create mode 100755 .scripts/integration_test.sh
 create mode 100644 .test/great_expectations/expectations/web/v1/integration_tests.json
 create mode 100644 .test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_1.json
 create mode 100644 .test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_2.json
 create mode 100644 .test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_3.json
 create mode 100644 .test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_4.json
 create mode 100644 .test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_5.json
 create mode 100644 .test/great_expectations/validation_configs/web/v1/bigquery/perm_integration_test_tables.json
 create mode 100644 .test/integration_tests/web/v1/bigquery_variables.yml.tmpl

diff --git a/.scripts/README.md b/.scripts/README.md
index 27b39472..3639b88e 100644
--- a/.scripts/README.md
+++ b/.scripts/README.md
@@ -53,6 +53,7 @@ Note that this script does not enforce dependencies, rather runs the playbooks i
 -d (dryRun) use sql-runner dry run
 -o (output path) path to store output of sql-runner to sql file (to be used in conjunction with p)
 -t (target template) path to target template to use (minimizes risk of credential leak)
+-v (variable template) path to variable template. Any variables in this template will override any corresponding variables within each playbook for the run.
 ```
 
 **Examples:**
@@ -178,6 +179,41 @@ bash .scripts/pr_check.sh -b ~/pathTo/sql-runner -d bigquery -m web;
 # Runs the pr check testing script against bigquery
 ```
 
+## integration_test.sh
+
+Runs 4 end to end runs of the standard model in 1 day increments, using the integration test dataset. The actual derived tables are then checked against the expect derived tables. The standard tests are also performed on the derived tables.
+
+We recommend using a virtual environment for python, eg. `pyenv` or `virtualenv` - for example using the latter:
+
+```bash
+virtualenv ~/myenv
+source ~/myenv/bin/activate
+```
+
+Before running, make sure to install python requirements (python3 required):
+
+```bash
+cd data-models/.test
+pip3 install -r requirements.txt
+```
+
+**Arguments:**
+
+```
+-b (binary) path to sql-runner binary [required]
+-d (database) target database for expectations [required]
+-a (auth) optional credentials for database target
+-m (model) target model to run i.e. web or mobile [required]
+```
+
+**Examples:**
+
+```bash
+bash .scripts/integration_test.sh -b ~/pathTo/sql-runner -d bigquery -m web
+
+# Runs the integration testing script against bigquery
+```
+
 ### `run_playbooks.sh` (deprecated)
 
 Deprecated - `run_config.sh` provides a simpler instrumentation for this functionality.
diff --git a/.scripts/e2e.sh b/.scripts/e2e.sh
index 6eb1ca56..abc564eb 100755
--- a/.scripts/e2e.sh
+++ b/.scripts/e2e.sh
@@ -4,6 +4,7 @@
 # -b (binary) path to sql-runner binary
 # -d (database) target database for expectations
 # -a (auth) optional credentials for database target
+# -m (model) target model to run i.e. web or mobile
 
 while getopts 'b:d:a:m:' v
 do
diff --git a/.scripts/integration_test.sh b/.scripts/integration_test.sh
new file mode 100755
index 00000000..cc2d6ce4
--- /dev/null
+++ b/.scripts/integration_test.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+# Expected input:
+# -b (binary) path to sql-runner binary
+# -d (database) target database for expectations
+# -a (auth) optional credentials for database target
+# -m (model) target model to run i.e. web or mobile
+
+while getopts 'b:d:a:m:' opt
+do
+  case $opt in
+    b) SQL_RUNNER_PATH=$OPTARG ;;
+    d) DATABASE=$OPTARG ;;
+    a) CREDENTIALS=$OPTARG ;;
+		m) MODEL=$OPTARG ;;
+  esac
+done
+
+repo_root_path=$( cd "$(dirname "$(dirname "${BASH_SOURCE[0]}")")" && pwd -P )
+script_path="${repo_root_path}/.scripts"
+config_dir="${repo_root_path}/$MODEL/v1/$DATABASE/sql-runner/configs"
+
+# Set credentials via env vars
+export BIGQUERY_CREDS=${BIGQUERY_CREDS:-$CREDENTIALS}
+export REDSHIFT_PASSWORD=${REDSHIFT_PASSWORD:-$CREDENTIALS}
+export SNOWFLAKE_PASSWORD=${SNOWFLAKE_PASSWORD:-$CREDENTIALS}
+
+echo "integration_check: Starting 5 runs"
+
+for i in {1..5}; do
+	
+  echo "integration_check: Starting run $i";
+
+  bash .scripts/run_config.sh -b sql-runner -c $config_dir/pre_test.json -t $script_path/templates/$DATABASE.yml.tmpl -v .test/integration_tests/$MODEL/v1/${DATABASE}_variables.yml.tmpl || exit;
+
+  echo "integration_check: Checking actual vs. expected for the events_staged table";
+
+  bash $script_path/run_test.sh -m $MODEL -d $DATABASE -c events_staged_integration_test_${i} || exit 1;
+
+  bash .scripts/run_config.sh -b sql-runner -c $config_dir/post_test.json -t $script_path/templates/$DATABASE.yml.tmpl -v .test/integration_tests/$MODEL/v1/${DATABASE}_variables.yml.tmpl || exit;
+
+  echo "integration_check: run $i done";
+
+done || exit 1
+
+echo "integration_check: Checking actual vs. expected for derived tables";
+
+bash $script_path/run_test.sh -m $MODEL -d $DATABASE -c perm_integration_test_tables || exit 1;
+
+echo "integration_check: Checking standard tests against derived tables";
+
+bash $script_path/run_test.sh -m $MODEL -d $DATABASE -c perm_tables || exit 1;
+
+echo "integration_check: Done"
diff --git a/.scripts/pr_check.sh b/.scripts/pr_check.sh
index 675aa917..4104d71d 100755
--- a/.scripts/pr_check.sh
+++ b/.scripts/pr_check.sh
@@ -4,6 +4,7 @@
 # -b (binary) path to sql-runner binary
 # -d (database) target database for expectations
 # -a (auth) optional credentials for database target
+# -m (model) target model to run i.e. web or mobile
 
 while getopts 'b:d:a:m:' v
 do
diff --git a/.scripts/run_config.sh b/.scripts/run_config.sh
index 1fb20f01..18705a1a 100755
--- a/.scripts/run_config.sh
+++ b/.scripts/run_config.sh
@@ -8,17 +8,19 @@
 # -d (dryRun) use sql-runner dry run
 # -o (output path) path to store output of sql-runner to sql file (to be used in conjunction with p)
 # -t (target template) path to target template to use (minimizes risk of credential leak)
+# -v (varialbles template) path to variables template to use
 
-while getopts 'pdb:c:a:o:t:' v
+while getopts 'pdb:c:a:o:t:v:' opt
 do
-  case $v in
+  case $opt in
     b) SQL_RUNNER_PATH=$OPTARG ;;
     c) CONFIG_PATH=$OPTARG ;;
     a) CREDENTIALS=$OPTARG ;;
     p) FILL_TEMPLATES='-fillTemplates' ;;
     d) DRY_RUN='-dryRun' ;;
     o) OUTPUT_PATH=$OPTARG ;;
-    t) TARGET_TEMPLATE=$OPTARG
+    t) TARGET_TEMPLATE=$OPTARG ;;
+    v) VARIABLES_TEMPLATE=$OPTARG
   esac
 done
 
@@ -72,6 +74,13 @@ do
 
   fi
 
+  if [ ! -z "$VARIABLES_TEMPLATE" ]; then
+
+    # Sub in any variables if specified
+    awk -F':' 'NR==FNR{a[$2]=$0;next} /:variables:/{flag=1} /:steps:/{flag=0} a[$2]&&flag{$0=a[$2]}1' $root_path/$VARIABLES_TEMPLATE $root_path/tmp/current_playbook.yml > $root_path/tmp/current_playbook.tmp && mv $root_path/tmp/current_playbook.tmp $root_path/tmp/current_playbook.yml
+
+  fi
+
   # If printing sql to file, mkdirs and set path vars
   if [ ! -z "$OUTPUT_PATH" ]; then
     mkdir -p $OUTPUT_PATH
diff --git a/.scripts/run_test.sh b/.scripts/run_test.sh
index ce882ffb..2b169379 100755
--- a/.scripts/run_test.sh
+++ b/.scripts/run_test.sh
@@ -4,6 +4,7 @@
 # -d (database) target database for expectations
 # -c (config) expectation config name
 # -a (auth) optional credentials for database target
+# -m (model) target model to run i.e. web or mobile
 
 while getopts 'd:c:a:m:' v
 do
diff --git a/.test/great_expectations/expectations/web/v1/integration_tests.json b/.test/great_expectations/expectations/web/v1/integration_tests.json
new file mode 100644
index 00000000..61c74508
--- /dev/null
+++ b/.test/great_expectations/expectations/web/v1/integration_tests.json
@@ -0,0 +1,69 @@
+{
+  "data_asset_type": "Dataset",
+  "expectation_suite_name": "integration_tests",
+  "expectations": [
+    {
+      "expectation_type": "expect_column_values_to_be_null",
+      "kwargs": {
+        "column": "long_session"
+      }
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_null",
+      "kwargs": {
+        "column": "null_page_view_id"
+      }
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_null",
+      "kwargs": {
+        "column": "null_domain_userid"
+      }
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_null",
+      "kwargs": {
+        "column": "null_domain_sessionid"
+      }
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_null",
+      "kwargs": {
+        "column": "dupe_event_id_same_collector_tstamp"
+      }
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_null",
+      "kwargs": {
+        "column": "dupe_event_id_diff_collector_tstamp"
+      }
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_null",
+      "kwargs": {
+        "column": "dupe_page_view_id_diff_derived_tstamp"
+      }
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_null",
+      "kwargs": {
+        "column": "late_arriving_dvc_created_sent"
+      }
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_null",
+      "kwargs": {
+        "column": "clean_session"
+      }
+    }
+  ],
+  "meta": {
+    "versions": {
+      "test_suite_version": "1.1.0",
+      "bigquery_model_version": "1.0.3"
+    },
+    "__comment__": "expect_column_values_to_be_null on column stray_page_ping has been removed as it is a known issue (https://github.com/snowplow/data-models/issues/92)",
+    "great_expectations.__version__": "0.12.0"
+  }
+}
+
diff --git a/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_1.json b/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_1.json
new file mode 100644
index 00000000..a967ee5d
--- /dev/null
+++ b/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_1.json
@@ -0,0 +1,13 @@
+{
+  "validation_operator_name": "action_list_operator",
+  "batches": [
+    {
+      "batch_kwargs": {
+        "datasource": "bigquery",
+        "query": "WITH expected_hashed AS ( SELECT a AS data, FARM_FINGERPRINT(FORMAT( '%%T', a)) AS h FROM dv_test_data.events_staged_run_1 AS a ), actual_hashed AS ( SELECT b AS data, FARM_FINGERPRINT(FORMAT( '%%T', b)) AS h FROM scratch_dev1.events_staged AS b ), equality_check AS ( SELECT IF(l.h IS NULL, 'New on right', 'New on left') AS Change, IF(l.h IS NULL,r.data,l.data).* FROM expected_hashed l FULL OUTER JOIN actual_hashed r ON l.h = r.h WHERE l.h IS NULL OR r.h IS NULL)SELECT  SUM(CASE WHEN user_id = 'long session' THEN 1 END) AS long_session, SUM(CASE WHEN user_id = 'NULL page_view_id' THEN 1 END) AS null_page_view_id, SUM(CASE WHEN user_id = 'NULL domain_userid' THEN 1 END) AS null_domain_userid, SUM(CASE WHEN user_id = 'NULL domain_sessionid' THEN 1 END) AS null_domain_sessionid, SUM(CASE WHEN user_id = 'dupe: event_id same collector_tstamp' THEN 1 END) AS dupe_event_id_same_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: event_id different collector_tstamp' THEN 1 END) AS dupe_event_id_diff_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id same derived_tstamp' THEN 1 END) AS dupe_page_view_id_same_derived_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id different derived_tstamp' THEN 1 END) AS dupe_page_view_id_diff_derived_tstamp, SUM(CASE WHEN user_id = 'late arriving: device created/sent >3 days' THEN 1 END) AS late_arriving_dvc_created_sent, SUM(CASE WHEN user_id = 'stray page ping' THEN 1 END) AS stray_page_ping, SUM(CASE WHEN user_id = 'No edge cases' THEN 1 END) AS clean_session FROM equality_check",
+        "bigquery_temp_table": "ge_test_derived_events_staged_integration"
+      },
+      "expectation_suite_names": ["web.v1.integration_tests"]
+    }
+  ]
+}
diff --git a/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_2.json b/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_2.json
new file mode 100644
index 00000000..90bd2188
--- /dev/null
+++ b/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_2.json
@@ -0,0 +1,13 @@
+{
+  "validation_operator_name": "action_list_operator",
+  "batches": [
+    {
+      "batch_kwargs": {
+        "datasource": "bigquery",
+        "query": "WITH expected_hashed AS ( SELECT a AS data, FARM_FINGERPRINT(FORMAT( '%%T', a)) AS h FROM dv_test_data.events_staged_run_2 AS a ), actual_hashed AS ( SELECT b AS data, FARM_FINGERPRINT(FORMAT( '%%T', b)) AS h FROM scratch_dev1.events_staged AS b ), equality_check AS ( SELECT IF(l.h IS NULL, 'New on right', 'New on left') AS Change, IF(l.h IS NULL,r.data,l.data).* FROM expected_hashed l FULL OUTER JOIN actual_hashed r ON l.h = r.h WHERE l.h IS NULL OR r.h IS NULL)SELECT  SUM(CASE WHEN user_id = 'long session' THEN 1 END) AS long_session, SUM(CASE WHEN user_id = 'NULL page_view_id' THEN 1 END) AS null_page_view_id, SUM(CASE WHEN user_id = 'NULL domain_userid' THEN 1 END) AS null_domain_userid, SUM(CASE WHEN user_id = 'NULL domain_sessionid' THEN 1 END) AS null_domain_sessionid, SUM(CASE WHEN user_id = 'dupe: event_id same collector_tstamp' THEN 1 END) AS dupe_event_id_same_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: event_id different collector_tstamp' THEN 1 END) AS dupe_event_id_diff_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id same derived_tstamp' THEN 1 END) AS dupe_page_view_id_same_derived_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id different derived_tstamp' THEN 1 END) AS dupe_page_view_id_diff_derived_tstamp, SUM(CASE WHEN user_id = 'late arriving: device created/sent >3 days' THEN 1 END) AS late_arriving_dvc_created_sent, SUM(CASE WHEN user_id = 'stray page ping' THEN 1 END) AS stray_page_ping, SUM(CASE WHEN user_id = 'No edge cases' THEN 1 END) AS clean_session FROM equality_check",
+        "bigquery_temp_table": "ge_test_derived_events_staged_integration"
+      },
+      "expectation_suite_names": ["web.v1.integration_tests"]
+    }
+  ]
+}
diff --git a/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_3.json b/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_3.json
new file mode 100644
index 00000000..d055a24a
--- /dev/null
+++ b/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_3.json
@@ -0,0 +1,13 @@
+{
+  "validation_operator_name": "action_list_operator",
+  "batches": [
+    {
+      "batch_kwargs": {
+        "datasource": "bigquery",
+        "query": "WITH expected_hashed AS ( SELECT a AS data, FARM_FINGERPRINT(FORMAT( '%%T', a)) AS h FROM dv_test_data.events_staged_run_3 AS a ), actual_hashed AS ( SELECT b AS data, FARM_FINGERPRINT(FORMAT( '%%T', b)) AS h FROM scratch_dev1.events_staged AS b ), equality_check AS ( SELECT IF(l.h IS NULL, 'New on right', 'New on left') AS Change, IF(l.h IS NULL,r.data,l.data).* FROM expected_hashed l FULL OUTER JOIN actual_hashed r ON l.h = r.h WHERE l.h IS NULL OR r.h IS NULL)SELECT  SUM(CASE WHEN user_id = 'long session' THEN 1 END) AS long_session, SUM(CASE WHEN user_id = 'NULL page_view_id' THEN 1 END) AS null_page_view_id, SUM(CASE WHEN user_id = 'NULL domain_userid' THEN 1 END) AS null_domain_userid, SUM(CASE WHEN user_id = 'NULL domain_sessionid' THEN 1 END) AS null_domain_sessionid, SUM(CASE WHEN user_id = 'dupe: event_id same collector_tstamp' THEN 1 END) AS dupe_event_id_same_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: event_id different collector_tstamp' THEN 1 END) AS dupe_event_id_diff_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id same derived_tstamp' THEN 1 END) AS dupe_page_view_id_same_derived_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id different derived_tstamp' THEN 1 END) AS dupe_page_view_id_diff_derived_tstamp, SUM(CASE WHEN user_id = 'late arriving: device created/sent >3 days' THEN 1 END) AS late_arriving_dvc_created_sent, SUM(CASE WHEN user_id = 'stray page ping' THEN 1 END) AS stray_page_ping, SUM(CASE WHEN user_id = 'No edge cases' THEN 1 END) AS clean_session FROM equality_check",
+        "bigquery_temp_table": "ge_test_derived_events_staged_integration"
+      },
+      "expectation_suite_names": ["web.v1.integration_tests"]
+    }
+  ]
+}
diff --git a/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_4.json b/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_4.json
new file mode 100644
index 00000000..a6694e74
--- /dev/null
+++ b/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_4.json
@@ -0,0 +1,13 @@
+{
+  "validation_operator_name": "action_list_operator",
+  "batches": [
+    {
+      "batch_kwargs": {
+        "datasource": "bigquery",
+        "query": "WITH expected_hashed AS ( SELECT a AS data, FARM_FINGERPRINT(FORMAT( '%%T', a)) AS h FROM dv_test_data.events_staged_run_4 AS a ), actual_hashed AS ( SELECT b AS data, FARM_FINGERPRINT(FORMAT( '%%T', b)) AS h FROM scratch_dev1.events_staged AS b ), equality_check AS ( SELECT IF(l.h IS NULL, 'New on right', 'New on left') AS Change, IF(l.h IS NULL,r.data,l.data).* FROM expected_hashed l FULL OUTER JOIN actual_hashed r ON l.h = r.h WHERE l.h IS NULL OR r.h IS NULL)SELECT  SUM(CASE WHEN user_id = 'long session' THEN 1 END) AS long_session, SUM(CASE WHEN user_id = 'NULL page_view_id' THEN 1 END) AS null_page_view_id, SUM(CASE WHEN user_id = 'NULL domain_userid' THEN 1 END) AS null_domain_userid, SUM(CASE WHEN user_id = 'NULL domain_sessionid' THEN 1 END) AS null_domain_sessionid, SUM(CASE WHEN user_id = 'dupe: event_id same collector_tstamp' THEN 1 END) AS dupe_event_id_same_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: event_id different collector_tstamp' THEN 1 END) AS dupe_event_id_diff_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id same derived_tstamp' THEN 1 END) AS dupe_page_view_id_same_derived_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id different derived_tstamp' THEN 1 END) AS dupe_page_view_id_diff_derived_tstamp, SUM(CASE WHEN user_id = 'late arriving: device created/sent >3 days' THEN 1 END) AS late_arriving_dvc_created_sent, SUM(CASE WHEN user_id = 'stray page ping' THEN 1 END) AS stray_page_ping, SUM(CASE WHEN user_id = 'No edge cases' THEN 1 END) AS clean_session FROM equality_check",
+        "bigquery_temp_table": "ge_test_derived_events_staged_integration"
+      },
+      "expectation_suite_names": ["web.v1.integration_tests"]
+    }
+  ]
+}
diff --git a/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_5.json b/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_5.json
new file mode 100644
index 00000000..12028b27
--- /dev/null
+++ b/.test/great_expectations/validation_configs/web/v1/bigquery/events_staged_integration_test_5.json
@@ -0,0 +1,13 @@
+{
+  "validation_operator_name": "action_list_operator",
+  "batches": [
+    {
+      "batch_kwargs": {
+        "datasource": "bigquery",
+        "query": "WITH expected_hashed AS ( SELECT a AS data, FARM_FINGERPRINT(FORMAT( '%%T', a)) AS h FROM dv_test_data.events_staged_run_5 AS a ), actual_hashed AS ( SELECT b AS data, FARM_FINGERPRINT(FORMAT( '%%T', b)) AS h FROM scratch_dev1.events_staged AS b ), equality_check AS ( SELECT IF(l.h IS NULL, 'New on right', 'New on left') AS Change, IF(l.h IS NULL,r.data,l.data).* FROM expected_hashed l FULL OUTER JOIN actual_hashed r ON l.h = r.h WHERE l.h IS NULL OR r.h IS NULL)SELECT  SUM(CASE WHEN user_id = 'long session' THEN 1 END) AS long_session, SUM(CASE WHEN user_id = 'NULL page_view_id' THEN 1 END) AS null_page_view_id, SUM(CASE WHEN user_id = 'NULL domain_userid' THEN 1 END) AS null_domain_userid, SUM(CASE WHEN user_id = 'NULL domain_sessionid' THEN 1 END) AS null_domain_sessionid, SUM(CASE WHEN user_id = 'dupe: event_id same collector_tstamp' THEN 1 END) AS dupe_event_id_same_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: event_id different collector_tstamp' THEN 1 END) AS dupe_event_id_diff_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id same derived_tstamp' THEN 1 END) AS dupe_page_view_id_same_derived_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id different derived_tstamp' THEN 1 END) AS dupe_page_view_id_diff_derived_tstamp, SUM(CASE WHEN user_id = 'late arriving: device created/sent >3 days' THEN 1 END) AS late_arriving_dvc_created_sent, SUM(CASE WHEN user_id = 'stray page ping' THEN 1 END) AS stray_page_ping, SUM(CASE WHEN user_id = 'No edge cases' THEN 1 END) AS clean_session FROM equality_check",
+        "bigquery_temp_table": "ge_test_derived_events_staged_integration"
+      },
+      "expectation_suite_names": ["web.v1.integration_tests"]
+    }
+  ]
+}
diff --git a/.test/great_expectations/validation_configs/web/v1/bigquery/perm_integration_test_tables.json b/.test/great_expectations/validation_configs/web/v1/bigquery/perm_integration_test_tables.json
new file mode 100644
index 00000000..81190524
--- /dev/null
+++ b/.test/great_expectations/validation_configs/web/v1/bigquery/perm_integration_test_tables.json
@@ -0,0 +1,29 @@
+{
+  "validation_operator_name": "action_list_operator",
+  "batches": [
+    {
+      "batch_kwargs": {
+        "datasource": "bigquery",
+        "query": "WITH expected_hashed AS ( SELECT a AS data, FARM_FINGERPRINT(FORMAT( '%%T', a)) AS h FROM dv_test_data.page_views AS a ), actual_hashed AS ( SELECT b AS data, FARM_FINGERPRINT(FORMAT( '%%T', b)) AS h FROM derived_dev1.page_views AS b ), equality_check AS ( SELECT IF(l.h IS NULL, 'New on right', 'New on left') AS Change, IF(l.h IS NULL,r.data,l.data).* FROM expected_hashed l FULL OUTER JOIN actual_hashed r ON l.h = r.h WHERE l.h IS NULL OR r.h IS NULL)SELECT  SUM(CASE WHEN user_id = 'long session' THEN 1 END) AS long_session, SUM(CASE WHEN user_id = 'NULL page_view_id' THEN 1 END) AS null_page_view_id, SUM(CASE WHEN user_id = 'NULL domain_userid' THEN 1 END) AS null_domain_userid, SUM(CASE WHEN user_id = 'NULL domain_sessionid' THEN 1 END) AS null_domain_sessionid, SUM(CASE WHEN user_id = 'dupe: event_id same collector_tstamp' THEN 1 END) AS dupe_event_id_same_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: event_id different collector_tstamp' THEN 1 END) AS dupe_event_id_diff_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id same derived_tstamp' THEN 1 END) AS dupe_page_view_id_same_derived_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id different derived_tstamp' THEN 1 END) AS dupe_page_view_id_diff_derived_tstamp, SUM(CASE WHEN user_id = 'late arriving: device created/sent >3 days' THEN 1 END) AS late_arriving_dvc_created_sent, SUM(CASE WHEN user_id = 'stray page ping' THEN 1 END) AS stray_page_ping, SUM(CASE WHEN user_id = 'No edge cases' THEN 1 END) AS clean_session FROM equality_check",
+        "bigquery_temp_table": "ge_test_derived_pv_integration"
+      },
+      "expectation_suite_names": ["web.v1.integration_tests"]
+    },
+    {
+      "batch_kwargs": {
+        "datasource": "bigquery",
+        "query": "WITH expected_hashed AS ( SELECT a AS data, FARM_FINGERPRINT(FORMAT( '%%T', a)) AS h FROM dv_test_data.sessions AS a ), actual_hashed AS ( SELECT b AS data, FARM_FINGERPRINT(FORMAT( '%%T', b)) AS h FROM derived_dev1.sessions AS b ), equality_check AS ( SELECT IF(l.h IS NULL, 'New on right', 'New on left') AS Change, IF(l.h IS NULL,r.data,l.data).* FROM expected_hashed l FULL OUTER JOIN actual_hashed r ON l.h = r.h WHERE l.h IS NULL OR r.h IS NULL)SELECT  SUM(CASE WHEN user_id = 'long session' THEN 1 END) AS long_session, SUM(CASE WHEN user_id = 'NULL page_view_id' THEN 1 END) AS null_page_view_id, SUM(CASE WHEN user_id = 'NULL domain_userid' THEN 1 END) AS null_domain_userid, SUM(CASE WHEN user_id = 'NULL domain_sessionid' THEN 1 END) AS null_domain_sessionid, SUM(CASE WHEN user_id = 'dupe: event_id same collector_tstamp' THEN 1 END) AS dupe_event_id_same_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: event_id different collector_tstamp' THEN 1 END) AS dupe_event_id_diff_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id same derived_tstamp' THEN 1 END) AS dupe_page_view_id_same_derived_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id different derived_tstamp' THEN 1 END) AS dupe_page_view_id_diff_derived_tstamp, SUM(CASE WHEN user_id = 'late arriving: device created/sent >3 days' THEN 1 END) AS late_arriving_dvc_created_sent, SUM(CASE WHEN user_id = 'stray page ping' THEN 1 END) AS stray_page_ping, SUM(CASE WHEN user_id = 'No edge cases' THEN 1 END) AS clean_session FROM equality_check",
+        "bigquery_temp_table": "ge_test_derived_sessions_integration"
+      },
+      "expectation_suite_names": ["web.v1.integration_tests"]
+    },
+    {
+      "batch_kwargs": {
+        "datasource": "bigquery",
+        "query": "WITH expected_hashed AS ( SELECT a AS data, FARM_FINGERPRINT(FORMAT( '%%T', a)) AS h FROM dv_test_data.users AS a ), actual_hashed AS ( SELECT b AS data, FARM_FINGERPRINT(FORMAT( '%%T', b)) AS h FROM derived_dev1.users AS b ), equality_check AS ( SELECT IF(l.h IS NULL, 'New on right', 'New on left') AS Change, IF(l.h IS NULL,r.data,l.data).* FROM expected_hashed l FULL OUTER JOIN actual_hashed r ON l.h = r.h WHERE l.h IS NULL OR r.h IS NULL)SELECT  SUM(CASE WHEN user_id = 'long session' THEN 1 END) AS long_session, SUM(CASE WHEN user_id = 'NULL page_view_id' THEN 1 END) AS null_page_view_id, SUM(CASE WHEN user_id = 'NULL domain_userid' THEN 1 END) AS null_domain_userid, SUM(CASE WHEN user_id = 'NULL domain_sessionid' THEN 1 END) AS null_domain_sessionid, SUM(CASE WHEN user_id = 'dupe: event_id same collector_tstamp' THEN 1 END) AS dupe_event_id_same_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: event_id different collector_tstamp' THEN 1 END) AS dupe_event_id_diff_collector_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id same derived_tstamp' THEN 1 END) AS dupe_page_view_id_same_derived_tstamp, SUM(CASE WHEN user_id = 'dupe: page_view_id different derived_tstamp' THEN 1 END) AS dupe_page_view_id_diff_derived_tstamp, SUM(CASE WHEN user_id = 'late arriving: device created/sent >3 days' THEN 1 END) AS late_arriving_dvc_created_sent, SUM(CASE WHEN user_id = 'stray page ping' THEN 1 END) AS stray_page_ping, SUM(CASE WHEN user_id = 'No edge cases' THEN 1 END) AS clean_session FROM equality_check",
+        "bigquery_temp_table": "ge_test_derived_users_integration"
+      },
+      "expectation_suite_names": ["web.v1.integration_tests"]
+    }
+  ]
+}
diff --git a/.test/integration_tests/web/v1/bigquery_variables.yml.tmpl b/.test/integration_tests/web/v1/bigquery_variables.yml.tmpl
new file mode 100644
index 00000000..1eab52a4
--- /dev/null
+++ b/.test/integration_tests/web/v1/bigquery_variables.yml.tmpl
@@ -0,0 +1,8 @@
+:variables:
+  :input_schema:        dv_test_data
+  :scratch_schema:      scratch_dev1
+  :output_schema:       derived_dev1
+  :entropy:             ""
+  :start_date:          2021-03-01
+  :update_cadence_days: 1
+:step:

From 0be98b51b0c8353b810fb94958f85ba498fb6343 Mon Sep 17 00:00:00 2001
From: bill-warner <71764352+bill-warner@users.noreply.github.com>
Date: Thu, 27 May 2021 10:10:51 +0300
Subject: [PATCH 2/2] Prepare for release

---
 .test/great_expectations/expectations/web/v1/base.json          | 2 +-
 .test/great_expectations/expectations/web/v1/base_redshift.json | 2 +-
 .../expectations/web/v1/integration_tests.json                  | 2 +-
 .test/great_expectations/expectations/web/v1/metadata.json      | 2 +-
 .../expectations/web/v1/page_view_in_session_values.json        | 2 +-
 .test/great_expectations/expectations/web/v1/page_views.json    | 2 +-
 .test/great_expectations/expectations/web/v1/sessions.json      | 2 +-
 .test/great_expectations/expectations/web/v1/users.json         | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.test/great_expectations/expectations/web/v1/base.json b/.test/great_expectations/expectations/web/v1/base.json
index 79390b6a..89b68216 100644
--- a/.test/great_expectations/expectations/web/v1/base.json
+++ b/.test/great_expectations/expectations/web/v1/base.json
@@ -137,7 +137,7 @@
   ],
   "meta": {
     "versions": {
-      "test_suite_version": "1.1.0",
+      "test_suite_version": "1.1.1",
       "bigquery_model_version": "1.0.3",
       "snowflake_model_version": "1.0.0"
     },
diff --git a/.test/great_expectations/expectations/web/v1/base_redshift.json b/.test/great_expectations/expectations/web/v1/base_redshift.json
index c4ec971c..9e87a41d 100644
--- a/.test/great_expectations/expectations/web/v1/base_redshift.json
+++ b/.test/great_expectations/expectations/web/v1/base_redshift.json
@@ -285,7 +285,7 @@
   ],
   "meta": {
     "versions": {
-      "test_suite_version": "1.1.0",
+      "test_suite_version": "1.1.1",
       "redshift_model_version": "1.2.0"
     },
     "great_expectations.__version__": "0.12.0"
diff --git a/.test/great_expectations/expectations/web/v1/integration_tests.json b/.test/great_expectations/expectations/web/v1/integration_tests.json
index 61c74508..ca6d9bfc 100644
--- a/.test/great_expectations/expectations/web/v1/integration_tests.json
+++ b/.test/great_expectations/expectations/web/v1/integration_tests.json
@@ -59,7 +59,7 @@
   ],
   "meta": {
     "versions": {
-      "test_suite_version": "1.1.0",
+      "test_suite_version": "1.1.1",
       "bigquery_model_version": "1.0.3"
     },
     "__comment__": "expect_column_values_to_be_null on column stray_page_ping has been removed as it is a known issue (https://github.com/snowplow/data-models/issues/92)",
diff --git a/.test/great_expectations/expectations/web/v1/metadata.json b/.test/great_expectations/expectations/web/v1/metadata.json
index 3117826a..cdc76817 100644
--- a/.test/great_expectations/expectations/web/v1/metadata.json
+++ b/.test/great_expectations/expectations/web/v1/metadata.json
@@ -102,7 +102,7 @@
   ],
   "meta": {
     "versions": {
-      "test_suite_version": "1.1.0",
+      "test_suite_version": "1.1.1",
       "redshift_model_version": "1.2.0",
       "bigquery_model_version": "1.0.3",
       "snowflake_model_version": "1.0.0"
diff --git a/.test/great_expectations/expectations/web/v1/page_view_in_session_values.json b/.test/great_expectations/expectations/web/v1/page_view_in_session_values.json
index 294bbfa5..996cd0d2 100644
--- a/.test/great_expectations/expectations/web/v1/page_view_in_session_values.json
+++ b/.test/great_expectations/expectations/web/v1/page_view_in_session_values.json
@@ -26,7 +26,7 @@
   ],
   "meta": {
     "versions": {
-      "test_suite_version": "1.1.0",
+      "test_suite_version": "1.1.1",
       "redshift_model_version": "1.2.0",
       "bigquery_model_version": "1.0.3",
       "snowflake_model_version": "1.0.0"
diff --git a/.test/great_expectations/expectations/web/v1/page_views.json b/.test/great_expectations/expectations/web/v1/page_views.json
index 57ef3902..b5f194ed 100644
--- a/.test/great_expectations/expectations/web/v1/page_views.json
+++ b/.test/great_expectations/expectations/web/v1/page_views.json
@@ -224,7 +224,7 @@
   ],
   "meta": {
     "versions": {
-      "test_suite_version": "1.1.0",
+      "test_suite_version": "1.1.1",
       "redshift_model_version": "1.2.0",
       "bigquery_model_version": "1.0.3",
       "snowflake_model_version": "1.0.0"
diff --git a/.test/great_expectations/expectations/web/v1/sessions.json b/.test/great_expectations/expectations/web/v1/sessions.json
index 908b78e0..b0234180 100644
--- a/.test/great_expectations/expectations/web/v1/sessions.json
+++ b/.test/great_expectations/expectations/web/v1/sessions.json
@@ -180,7 +180,7 @@
   ],
   "meta": {
     "versions": {
-      "test_suite_version": "1.1.0",
+      "test_suite_version": "1.1.1",
       "redshift_model_version": "1.2.0",
       "bigquery_model_version": "1.0.3",
       "snowflake_model_version": "1.0.0"
diff --git a/.test/great_expectations/expectations/web/v1/users.json b/.test/great_expectations/expectations/web/v1/users.json
index f25db4bf..b4d2fe6e 100644
--- a/.test/great_expectations/expectations/web/v1/users.json
+++ b/.test/great_expectations/expectations/web/v1/users.json
@@ -116,7 +116,7 @@
   ],
   "meta": {
     "versions": {
-      "test_suite_version": "1.1.0",
+      "test_suite_version": "1.1.1",
       "redshift_model_version": "1.2.0",
       "bigquery_model_version": "1.0.3",
       "snowflake_model_version": "1.0.0"