From e656563fa502836db023190604d7d0121e88f39a Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 31 Jul 2024 23:15:36 -0700 Subject: [PATCH 01/21] adding initial version of table dump --- .../blueprints/client/api2.py | 21 ++------ .../blueprints/client/utils.py | 20 +++++++ .../blueprints/materialize/api.py | 54 +++++++++++++++++++ 3 files changed, 79 insertions(+), 16 deletions(-) diff --git a/materializationengine/blueprints/client/api2.py b/materializationengine/blueprints/client/api2.py index aa43277a..95a46b6b 100644 --- a/materializationengine/blueprints/client/api2.py +++ b/materializationengine/blueprints/client/api2.py @@ -32,6 +32,7 @@ from materializationengine.blueprints.client.utils import ( create_query_response, collect_crud_columns, + get_latest_version, ) from materializationengine.blueprints.client.schemas import ( ComplexQuerySchema, @@ -1868,24 +1869,12 @@ def get( datastack_name ) + if version == -1: + version = get_latest_version(datastack_name) + print(f"using version {version}") + mat_db_name = f"{datastack_name}__mat{version}" if version == 0: mat_db_name = f"{aligned_volume_name}" - elif version == -1: - mat_db_name = f"{aligned_volume_name}" - session = sqlalchemy_cache.get(mat_db_name) - # query the database for the latest valid version - response = ( - session.query(AnalysisVersion) - .filter(AnalysisVersion.datastack == datastack_name) - .filter(AnalysisVersion.valid) - .order_by(AnalysisVersion.time_stamp.desc()) - .first() - ) - version = response.version - print(f"using version {version}") - mat_db_name = f"{datastack_name}__mat{version}" - else: - mat_db_name = f"{datastack_name}__mat{version}" df, column_names, warnings = assemble_view_dataframe( datastack_name, version, view_name, {}, {} diff --git a/materializationengine/blueprints/client/utils.py b/materializationengine/blueprints/client/utils.py index cdfee6a1..7992abaf 100644 --- a/materializationengine/blueprints/client/utils.py +++ b/materializationengine/blueprints/client/utils.py @@ -3,6 +3,9 @@ from cloudfiles import compression from io import BytesIO +from materializationengine.info_client import get_datastack_info +from materializationengine.database import sqlalchemy_cache + def collect_crud_columns(column_names): crud_columns = [] @@ -59,6 +62,23 @@ def update_notice_text_warnings(ann_md, warnings, table_name): return warnings +def get_latest_version(datastack_name): + aligned_volume_name = get_datastack_info(datastack_name)["aligned_volume"]["name"] + session = sqlalchemy_cache.get(aligned_volume_name) + # query the database for the latest valid version + response = ( + session.query(AnalysisVersion) + .filter(AnalysisVersion.datastack == datastack_name) + .filter(AnalysisVersion.valid) + .order_by(AnalysisVersion.time_stamp.desc()) + .first() + ) + if response is None: + return None + else: + return response.version + + def create_query_response( df, warnings, diff --git a/materializationengine/blueprints/materialize/api.py b/materializationengine/blueprints/materialize/api.py index 00f88003..b05caaba 100644 --- a/materializationengine/blueprints/materialize/api.py +++ b/materializationengine/blueprints/materialize/api.py @@ -5,6 +5,7 @@ from flask import abort, current_app, request from flask_accepts import accepts from flask_restx import Namespace, Resource, inputs, reqparse +from materializationengine.blueprints.client.utils import get_latest_version from materializationengine.blueprints.reset_auth import reset_auth from materializationengine.database import ( create_session, @@ -24,6 +25,8 @@ from sqlalchemy.engine.url import make_url from sqlalchemy.exc import NoSuchTableError from materializationengine.utils import check_write_permission +import os +import cloudfiles from materializationengine.blueprints.materialize.schemas import ( @@ -396,6 +399,57 @@ def post(self, datastack_name: str): return 200 +@mat_bp.route( + "/materialize/run/dump_csv_table/datastack//version//table_name//" +) +class DumpTableToBucketAsCSV(Resource): + @reset_auth + @auth_requires_admin + @mat_bp.doc("Take table or view and dump it to a bucket as csv", security="apikey") + def post(self, datastack_name: str, version: int, table_name: str): + """Dump table to bucket as csv + + Args: + datastack_name (str): name of datastack from infoservice + version (int): version of datastack + table_name (str): name of table or view to dump + """ + mat_db_name = f"{datastack_name}__mat{version}" + + # TODO: add validation of parameters + sql_instance_name = current_app.config.get("SQL_INSTANCE_NAME", None) + if not sql_instance_name: + return 500, "SQL_INSTANCE_NAME not set in app config" + + bucket = current_app.config.get("MATERIALIZATION_DUMP_BUCKET", None) + if not bucket: + return 500, "MATERIALIZATION_DUMP_BUCKET not set in app config" + + if version == -1: + version = get_latest_version(datastack_name) + + cf = cloudfiles.CloudFiles(bucket) + filename = f"{datastack_name}/v{version}/{table_name}.csv.gz" + + cloudpath = os.path.join(bucket, filename) + + # check if the file already exists + if cf.exists(filename): + return 200, "file already exists, nothing to do here" + else: + # run a gcloud command to select * from table and write it to disk as a csv + export_command = f"gcloud sql export csv {sql_instance_name} \ + {cloudpath} --database {mat_db_name} --async \ + --query='select * from {table_name}'" + # run this command and capture the stdout and return code + return_code = os.system(export_command) + + if return_code != 0: + return 500, f"file failed to create using: {export_command}" + else: + return 200, "file is being created at {cloudpath}" + + @mat_bp.route("/materialize/run/update_database/datastack/") class UpdateLiveDatabaseResource(Resource): @reset_auth From 0fc7c38749d91f5acfa2bacda566b479a79b4990 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Thu, 1 Aug 2024 00:33:58 -0700 Subject: [PATCH 02/21] fix missing import --- materializationengine/blueprints/client/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/materializationengine/blueprints/client/utils.py b/materializationengine/blueprints/client/utils.py index 7992abaf..8fed61de 100644 --- a/materializationengine/blueprints/client/utils.py +++ b/materializationengine/blueprints/client/utils.py @@ -5,6 +5,7 @@ from materializationengine.info_client import get_datastack_info from materializationengine.database import sqlalchemy_cache +from dynamicannotationdb.models import AnalysisVersion def collect_crud_columns(column_names): From 46c6c338120563c213fe9b4fa6527fd7bc603c59 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Thu, 1 Aug 2024 00:46:27 -0700 Subject: [PATCH 03/21] installing gcloud sdk --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index 95355a7a..02324fdb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,8 @@ RUN mkdir -p /home/nginx/.cloudvolume/secrets \ COPY requirements.txt /app/. RUN python -m pip install --upgrade pip RUN pip install -r requirements.txt +RUN curl -sSL https://sdk.cloud.google.com | bash +ENV PATH $PATH:/root/google-cloud-sdk/bin COPY . /app COPY override/timeout.conf /etc/nginx/conf.d/timeout.conf COPY gracefully_shutdown_celery.sh /home/nginx From 687e0dbc1c4fee5d35acea65d298e267e565bcb5 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Thu, 1 Aug 2024 00:53:08 -0700 Subject: [PATCH 04/21] trying to install as nginx --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 02324fdb..ba7a03ff 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ RUN mkdir -p /home/nginx/.cloudvolume/secrets \ COPY requirements.txt /app/. RUN python -m pip install --upgrade pip RUN pip install -r requirements.txt -RUN curl -sSL https://sdk.cloud.google.com | bash +RUN sudo --user=nginx curl -sSL https://sdk.cloud.google.com | bash ENV PATH $PATH:/root/google-cloud-sdk/bin COPY . /app COPY override/timeout.conf /etc/nginx/conf.d/timeout.conf From 7cf6017bd918f4abb1bf4138477671943aba325d Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Thu, 1 Aug 2024 01:01:36 -0700 Subject: [PATCH 05/21] add error handling --- .../blueprints/materialize/api.py | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/materializationengine/blueprints/materialize/api.py b/materializationengine/blueprints/materialize/api.py index b05caaba..da8b1f4c 100644 --- a/materializationengine/blueprints/materialize/api.py +++ b/materializationengine/blueprints/materialize/api.py @@ -26,6 +26,7 @@ from sqlalchemy.exc import NoSuchTableError from materializationengine.utils import check_write_permission import os +import subprocess import cloudfiles @@ -438,14 +439,30 @@ def post(self, datastack_name: str, version: int, table_name: str): return 200, "file already exists, nothing to do here" else: # run a gcloud command to select * from table and write it to disk as a csv - export_command = f"gcloud sql export csv {sql_instance_name} \ - {cloudpath} --database {mat_db_name} --async \ - --query='select * from {table_name}'" + export_command = [ + "gcloud", + "sql", + "export", + "csv", + sql_instance_name, + cloudpath, + "--database", + mat_db_name, + "--async", + f"--query=select * from {table_name}", + ] + process = subprocess.Popen( + export_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + stdout, stderr = process.communicate() # run this command and capture the stdout and return code - return_code = os.system(export_command) - + return_code = process.returncode if return_code != 0: - return 500, f"file failed to create using: {export_command}" + return ( + 500, + f"file failed to create using: {export_command}. \ + Error: {stderr.decode()} stdout: {stdout.decode()}", + ) else: return 200, "file is being created at {cloudpath}" From 1ca0d96cab8851c3c85208aa3e3e57ef8ad60c84 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Thu, 1 Aug 2024 02:27:08 -0700 Subject: [PATCH 06/21] try new docker --- Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index ba7a03ff..9988c7ea 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,8 +7,11 @@ RUN mkdir -p /home/nginx/.cloudvolume/secrets \ COPY requirements.txt /app/. RUN python -m pip install --upgrade pip RUN pip install -r requirements.txt -RUN sudo --user=nginx curl -sSL https://sdk.cloud.google.com | bash +# Install gcloud SDK as root and set permissions +RUN curl -sSL https://sdk.cloud.google.com | bash ENV PATH $PATH:/root/google-cloud-sdk/bin +# Change ownership of gcloud installation to nginx user +RUN chown -R nginx:nginx /root/google-cloud-sdk COPY . /app COPY override/timeout.conf /etc/nginx/conf.d/timeout.conf COPY gracefully_shutdown_celery.sh /home/nginx From e2cdaf58b60dc690673c39bd08d077fd325d7cf3 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Thu, 1 Aug 2024 11:38:37 -0400 Subject: [PATCH 07/21] trying to fix gcloud install --- Dockerfile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 9988c7ea..3da2c975 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,8 +8,14 @@ COPY requirements.txt /app/. RUN python -m pip install --upgrade pip RUN pip install -r requirements.txt # Install gcloud SDK as root and set permissions +# Install gcloud SDK as root RUN curl -sSL https://sdk.cloud.google.com | bash -ENV PATH $PATH:/root/google-cloud-sdk/bin + +# Change ownership of gcloud installation to nginx user +RUN chown -R nginx:nginx /root/google-cloud-sdk +# Set the PATH for the nginx user +ENV PATH /home/nginx/google-cloud-sdk/bin:/root/google-cloud-sdk/bin:$PATH + # Change ownership of gcloud installation to nginx user RUN chown -R nginx:nginx /root/google-cloud-sdk COPY . /app From aa8a963f901223acb1b58e9ba4397fc91332135f Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 20 Aug 2024 09:13:29 -0700 Subject: [PATCH 08/21] change docker build --- Dockerfile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3da2c975..dc23a805 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,12 +9,10 @@ RUN python -m pip install --upgrade pip RUN pip install -r requirements.txt # Install gcloud SDK as root and set permissions # Install gcloud SDK as root -RUN curl -sSL https://sdk.cloud.google.com | bash # Change ownership of gcloud installation to nginx user RUN chown -R nginx:nginx /root/google-cloud-sdk # Set the PATH for the nginx user -ENV PATH /home/nginx/google-cloud-sdk/bin:/root/google-cloud-sdk/bin:$PATH # Change ownership of gcloud installation to nginx user RUN chown -R nginx:nginx /root/google-cloud-sdk @@ -24,4 +22,8 @@ COPY gracefully_shutdown_celery.sh /home/nginx RUN chmod +x /home/nginx/gracefully_shutdown_celery.sh RUN mkdir -p /home/nginx/tmp/shutdown RUN chmod +x /entrypoint.sh -WORKDIR /app \ No newline at end of file +WORKDIR /app +USER nginx +RUN curl -sSL https://sdk.cloud.google.com | bash +ENV PATH /home/nginx/google-cloud-sdk/bin:/root/google-cloud-sdk/bin:$PATH +USER root From 5b51a85a8fa0abf926b755ee4dcd09f01f97a36f Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 20 Aug 2024 09:17:41 -0700 Subject: [PATCH 09/21] docker gcloud fix --- Dockerfile | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index dc23a805..728a6880 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,13 +9,6 @@ RUN python -m pip install --upgrade pip RUN pip install -r requirements.txt # Install gcloud SDK as root and set permissions # Install gcloud SDK as root - -# Change ownership of gcloud installation to nginx user -RUN chown -R nginx:nginx /root/google-cloud-sdk -# Set the PATH for the nginx user - -# Change ownership of gcloud installation to nginx user -RUN chown -R nginx:nginx /root/google-cloud-sdk COPY . /app COPY override/timeout.conf /etc/nginx/conf.d/timeout.conf COPY gracefully_shutdown_celery.sh /home/nginx From 912fb53d787c2df5017543f6992eca55c5737084 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 20 Aug 2024 09:50:26 -0700 Subject: [PATCH 10/21] fix query syntax --- materializationengine/blueprints/materialize/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/materializationengine/blueprints/materialize/api.py b/materializationengine/blueprints/materialize/api.py index da8b1f4c..8b0313cd 100644 --- a/materializationengine/blueprints/materialize/api.py +++ b/materializationengine/blueprints/materialize/api.py @@ -449,7 +449,7 @@ def post(self, datastack_name: str, version: int, table_name: str): "--database", mat_db_name, "--async", - f"--query=select * from {table_name}", + f"--query='select * from {table_name}'", ] process = subprocess.Popen( export_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE From 71c62cd05bcc6a22208d04e1384dfd693124ec45 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 20 Aug 2024 10:49:56 -0700 Subject: [PATCH 11/21] adding service account activation --- .../blueprints/materialize/api.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/materializationengine/blueprints/materialize/api.py b/materializationengine/blueprints/materialize/api.py index 8b0313cd..185eb653 100644 --- a/materializationengine/blueprints/materialize/api.py +++ b/materializationengine/blueprints/materialize/api.py @@ -438,6 +438,27 @@ def post(self, datastack_name: str, version: int, table_name: str): if cf.exists(filename): return 200, "file already exists, nothing to do here" else: + # run a gcloud command to activate the service account for gcloud + activate_command = [ + "gcloud", + "auth", + "activate-service-account", + "--key-file", + os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"), + ] + process = subprocess.Popen( + activate_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + stdout, stderr = process.communicate() + # run this command and capture the stdout and return code + return_code = process.returncode + if return_code != 0: + return ( + 500, + f"failed to activate service account using \ + {activate_command}. Error: {stderr.decode()} stdout: {stdout.decode()}", + ) + # run a gcloud command to select * from table and write it to disk as a csv export_command = [ "gcloud", From 32457aee175e8dabb33c9645a828d4edaa351e9d Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 20 Aug 2024 12:11:08 -0700 Subject: [PATCH 12/21] fixing command --- materializationengine/blueprints/materialize/api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/materializationengine/blueprints/materialize/api.py b/materializationengine/blueprints/materialize/api.py index 185eb653..792fba23 100644 --- a/materializationengine/blueprints/materialize/api.py +++ b/materializationengine/blueprints/materialize/api.py @@ -470,8 +470,9 @@ def post(self, datastack_name: str, version: int, table_name: str): "--database", mat_db_name, "--async", - f"--query='select * from {table_name}'", + f'--query="SELECT * from {table_name}"', ] + print(export_command) process = subprocess.Popen( export_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) From 09a24bac9d254b62952bea794ca16aa8f3e0781d Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 20 Aug 2024 13:24:37 -0700 Subject: [PATCH 13/21] fix export syntax --- materializationengine/blueprints/materialize/api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/materializationengine/blueprints/materialize/api.py b/materializationengine/blueprints/materialize/api.py index 792fba23..5272eaca 100644 --- a/materializationengine/blueprints/materialize/api.py +++ b/materializationengine/blueprints/materialize/api.py @@ -470,7 +470,8 @@ def post(self, datastack_name: str, version: int, table_name: str): "--database", mat_db_name, "--async", - f'--query="SELECT * from {table_name}"', + "--query", + f"SELECT * from {table_name}", ] print(export_command) process = subprocess.Popen( From b5123bbc760a44104f83e1e2eec679fac999e437 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 23 Aug 2024 08:12:32 -0700 Subject: [PATCH 14/21] improved dump --- .../blueprints/materialize/api.py | 65 ++++++++++++++----- 1 file changed, 50 insertions(+), 15 deletions(-) diff --git a/materializationengine/blueprints/materialize/api.py b/materializationengine/blueprints/materialize/api.py index 5272eaca..9b3c93a1 100644 --- a/materializationengine/blueprints/materialize/api.py +++ b/materializationengine/blueprints/materialize/api.py @@ -400,6 +400,11 @@ def post(self, datastack_name: str): return 200 +response_model = api.model( + "Response", {"message": fields.String(description="Response message")} +) + + @mat_bp.route( "/materialize/run/dump_csv_table/datastack//version//table_name//" ) @@ -407,6 +412,8 @@ class DumpTableToBucketAsCSV(Resource): @reset_auth @auth_requires_admin @mat_bp.doc("Take table or view and dump it to a bucket as csv", security="apikey") + @api.response(200, "Success", response_model) + @api.response(500, "Internal Server Error", response_model) def post(self, datastack_name: str, version: int, table_name: str): """Dump table to bucket as csv @@ -420,11 +427,11 @@ def post(self, datastack_name: str, version: int, table_name: str): # TODO: add validation of parameters sql_instance_name = current_app.config.get("SQL_INSTANCE_NAME", None) if not sql_instance_name: - return 500, "SQL_INSTANCE_NAME not set in app config" + return {"message": "SQL_INSTANCE_NAME not set in app config"}, 500 bucket = current_app.config.get("MATERIALIZATION_DUMP_BUCKET", None) if not bucket: - return 500, "MATERIALIZATION_DUMP_BUCKET not set in app config" + return {"message": "MATERIALIZATION_DUMP_BUCKET not set in app config"}, 500 if version == -1: version = get_latest_version(datastack_name) @@ -436,7 +443,9 @@ def post(self, datastack_name: str, version: int, table_name: str): # check if the file already exists if cf.exists(filename): - return 200, "file already exists, nothing to do here" + # return a flask respoonse 200 message that says that the file already exitss + return {"message": f"file already exists at {cloudpath}"}, 200 + else: # run a gcloud command to activate the service account for gcloud activate_command = [ @@ -453,11 +462,9 @@ def post(self, datastack_name: str, version: int, table_name: str): # run this command and capture the stdout and return code return_code = process.returncode if return_code != 0: - return ( - 500, - f"failed to activate service account using \ - {activate_command}. Error: {stderr.decode()} stdout: {stdout.decode()}", - ) + return { + "message": f"failed to activate service account using {activate_command}. Error: {stderr.decode()} stdout: {stdout.decode()}" + }, 500 # run a gcloud command to select * from table and write it to disk as a csv export_command = [ @@ -473,7 +480,7 @@ def post(self, datastack_name: str, version: int, table_name: str): "--query", f"SELECT * from {table_name}", ] - print(export_command) + process = subprocess.Popen( export_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) @@ -481,13 +488,41 @@ def post(self, datastack_name: str, version: int, table_name: str): # run this command and capture the stdout and return code return_code = process.returncode if return_code != 0: - return ( - 500, - f"file failed to create using: {export_command}. \ - Error: {stderr.decode()} stdout: {stdout.decode()}", - ) + return { + "message": f"file failed to create using: {export_command}. Error: {stderr.decode()} stdout: {stdout.decode()}" + }, 500 + header_file = f"{datastack_name}/v{version}/{table_name}_header.csv" + header_cloudpath = os.path.join(bucket, header_file) + + header_command = [ + "gcloud", + "sql", + "export", + "csv", + sql_instance_name, + header_cloudpath, + "--database", + mat_db_name, + "--async", + "--query", + f"SELECT column_name, data_type from INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'", + ] + process = subprocess.Popen( + header_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + stdout, stderr = process.communicate() + # run this command and capture the stdout and return code + return_code = process.returncode + if return_code != 0: + return { + "message": f"header file failed to create using:\ + {header_command}. Error: {stderr.decode()} stdout: {stdout.decode()}" + }, 500 + else: - return 200, "file is being created at {cloudpath}" + return { + "message": f"file created at {cloudpath}, header at {header_cloudpath}" + }, 200 @mat_bp.route("/materialize/run/update_database/datastack/") From adf2819263b07847ab0d66a38e09d00b2a27dc8e Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 23 Aug 2024 09:01:07 -0700 Subject: [PATCH 15/21] typo fix --- materializationengine/blueprints/materialize/api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/materializationengine/blueprints/materialize/api.py b/materializationengine/blueprints/materialize/api.py index 9b3c93a1..95233f89 100644 --- a/materializationengine/blueprints/materialize/api.py +++ b/materializationengine/blueprints/materialize/api.py @@ -400,7 +400,7 @@ def post(self, datastack_name: str): return 200 -response_model = api.model( +response_model = mat_bp.model( "Response", {"message": fields.String(description="Response message")} ) @@ -412,8 +412,8 @@ class DumpTableToBucketAsCSV(Resource): @reset_auth @auth_requires_admin @mat_bp.doc("Take table or view and dump it to a bucket as csv", security="apikey") - @api.response(200, "Success", response_model) - @api.response(500, "Internal Server Error", response_model) + @mat_bp.response(200, "Success", response_model) + @mat_bp.response(500, "Internal Server Error", response_model) def post(self, datastack_name: str, version: int, table_name: str): """Dump table to bucket as csv From 60faa6ea32bb28fe882dff82422735bda88c2fe8 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 23 Aug 2024 10:02:25 -0700 Subject: [PATCH 16/21] missing import --- materializationengine/blueprints/materialize/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/materializationengine/blueprints/materialize/api.py b/materializationengine/blueprints/materialize/api.py index 95233f89..c22e5d99 100644 --- a/materializationengine/blueprints/materialize/api.py +++ b/materializationengine/blueprints/materialize/api.py @@ -4,7 +4,7 @@ from dynamicannotationdb.models import AnalysisTable, Base from flask import abort, current_app, request from flask_accepts import accepts -from flask_restx import Namespace, Resource, inputs, reqparse +from flask_restx import Namespace, Resource, inputs, reqparse, fields from materializationengine.blueprints.client.utils import get_latest_version from materializationengine.blueprints.reset_auth import reset_auth from materializationengine.database import ( From c14d51fab9390b54872ea6499379f0fb47d98347 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 23 Aug 2024 10:55:55 -0700 Subject: [PATCH 17/21] switch order of header and csv --- .../blueprints/materialize/api.py | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/materializationengine/blueprints/materialize/api.py b/materializationengine/blueprints/materialize/api.py index c22e5d99..bd5c739d 100644 --- a/materializationengine/blueprints/materialize/api.py +++ b/materializationengine/blueprints/materialize/api.py @@ -466,57 +466,57 @@ def post(self, datastack_name: str, version: int, table_name: str): "message": f"failed to activate service account using {activate_command}. Error: {stderr.decode()} stdout: {stdout.decode()}" }, 500 - # run a gcloud command to select * from table and write it to disk as a csv - export_command = [ + header_file = f"{datastack_name}/v{version}/{table_name}_header.csv" + header_cloudpath = os.path.join(bucket, header_file) + + header_command = [ "gcloud", "sql", "export", "csv", sql_instance_name, - cloudpath, + header_cloudpath, "--database", mat_db_name, - "--async", "--query", - f"SELECT * from {table_name}", + f"SELECT column_name, data_type from INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'", ] - process = subprocess.Popen( - export_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE + header_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) stdout, stderr = process.communicate() # run this command and capture the stdout and return code return_code = process.returncode if return_code != 0: return { - "message": f"file failed to create using: {export_command}. Error: {stderr.decode()} stdout: {stdout.decode()}" + "message": f"header file failed to create using:\ + {header_command}. Error: {stderr.decode()} stdout: {stdout.decode()}" }, 500 - header_file = f"{datastack_name}/v{version}/{table_name}_header.csv" - header_cloudpath = os.path.join(bucket, header_file) - header_command = [ + # run a gcloud command to select * from table and write it to disk as a csv + export_command = [ "gcloud", "sql", "export", "csv", sql_instance_name, - header_cloudpath, + cloudpath, "--database", mat_db_name, "--async", "--query", - f"SELECT column_name, data_type from INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'", + f"SELECT * from {table_name}", ] + process = subprocess.Popen( - header_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE + export_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) stdout, stderr = process.communicate() # run this command and capture the stdout and return code return_code = process.returncode if return_code != 0: return { - "message": f"header file failed to create using:\ - {header_command}. Error: {stderr.decode()} stdout: {stdout.decode()}" + "message": f"file failed to create using: {export_command}. Error: {stderr.decode()} stdout: {stdout.decode()}" }, 500 else: From de900984d3637233da74af57dff3c57684c58f1e Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 23 Aug 2024 14:25:37 -0700 Subject: [PATCH 18/21] add version 0 bypass --- materializationengine/blueprints/client/datastack.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/materializationengine/blueprints/client/datastack.py b/materializationengine/blueprints/client/datastack.py index 590f4481..73cbc0c8 100644 --- a/materializationengine/blueprints/client/datastack.py +++ b/materializationengine/blueprints/client/datastack.py @@ -37,6 +37,8 @@ def wrapper(*args, **kwargs): AnalysisVersion.datastack == target_datastack ) if target_version: + if target_version == 0: + return f(*args, **kwargs) if target_version == -1: return f(*args, **kwargs) version_query = version_query.filter( From f2bd476b54faa9e12616fca83e4364f81ed4ab5a Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 23 Aug 2024 16:26:19 -0700 Subject: [PATCH 19/21] adding unsigned int --- .../blueprints/client/api2.py | 32 ++++++++++++------- materializationengine/views.py | 4 +-- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/materializationengine/blueprints/client/api2.py b/materializationengine/blueprints/client/api2.py index 95a46b6b..b8282b41 100644 --- a/materializationengine/blueprints/client/api2.py +++ b/materializationengine/blueprints/client/api2.py @@ -574,7 +574,9 @@ def get(self, datastack_name: str): return versions, 200 -@client_bp.route("/datastack//version/") +@client_bp.route( + "/datastack//version/" +) class DatastackVersion(Resource): method_decorators = [ limit_by_category("fast_query"), @@ -611,7 +613,7 @@ def get(self, datastack_name: str, version: int): @client_bp.route( - "/datastack//version//table//count" + "/datastack//version//table//count" ) class FrozenTableCount(Resource): method_decorators = [ @@ -708,7 +710,9 @@ def get(self, datastack_name: str): return schema.dump(response, many=True), 200 -@client_bp.route("/datastack//version//tables") +@client_bp.route( + "/datastack//version//tables" +) class FrozenTableVersions(Resource): method_decorators = [ limit_by_category("fast_query"), @@ -753,7 +757,7 @@ def get(self, datastack_name: str, version: int): @client_bp.route( - "/datastack//version//tables/metadata" + "/datastack//version//tables/metadata" ) class FrozenTablesMetadata(Resource): method_decorators = [ @@ -812,7 +816,7 @@ def get( @client_bp.route( - "/datastack//version//table//metadata" + "/datastack//version//table//metadata" ) class FrozenTableMetadata(Resource): method_decorators = [ @@ -869,7 +873,7 @@ def get( @client_bp.expect(query_parser) @client_bp.route( - "/datastack//version//table//query" + "/datastack//version//table//query" ) class FrozenTableQuery(Resource): method_decorators = [ @@ -1145,7 +1149,7 @@ def preprocess_view_dataframe(df, view_name, db_name, column_names): @client_bp.expect(query_seg_prop_parser) @client_bp.route( - "/datastack//version//table//info" + "/datastack//version//table//info" ) class MatTableSegmentInfo(Resource): method_decorators = [ @@ -1331,7 +1335,9 @@ def get( @client_bp.expect(query_parser) -@client_bp.route("/datastack//version//query") +@client_bp.route( + "/datastack//version//query" +) class FrozenQuery(Resource): method_decorators = [ validate_datastack, @@ -1624,7 +1630,9 @@ def post(self, datastack_name: str): @client_bp.expect(query_parser) -@client_bp.route("/datastack//version//views") +@client_bp.route( + "/datastack//version//views" +) class AvailableViews(Resource): method_decorators = [ validate_datastack, @@ -1909,7 +1917,7 @@ def get( @client_bp.expect(query_parser) @client_bp.route( - "/datastack//version//views//query" + "/datastack//version//views//query" ) class ViewQuery(Resource): method_decorators = [ @@ -2029,7 +2037,7 @@ def get_table_schema(table): @client_bp.route( - "/datastack//version//views//schema" + "/datastack//version//views//schema" ) class ViewSchema(Resource): method_decorators = [ @@ -2075,7 +2083,7 @@ def get( @client_bp.route( - "/datastack//version//views/schemas" + "/datastack//version//views/schemas" ) class ViewSchemas(Resource): method_decorators = [ diff --git a/materializationengine/views.py b/materializationengine/views.py index 27ce5f0f..61adbe80 100644 --- a/materializationengine/views.py +++ b/materializationengine/views.py @@ -210,7 +210,7 @@ def datastack_view(datastack_name): ) -@views_bp.route("/datastack//version//failed") +@views_bp.route("/datastack//version//failed") @auth_requires_permission("view", table_arg="datastack_name") def version_error(datastack_name: str, id: int): aligned_volume_name, pcg_table_name = get_relevant_datastack_info(datastack_name) @@ -289,7 +289,7 @@ def make_seg_prop_ng_link(datastack_name, table_name, version, client, is_view=F return url_link -@views_bp.route("/datastack//version/") +@views_bp.route("/datastack//version/") @auth_requires_permission("view", table_arg="datastack_name") def version_view(datastack_name: str, id: int): aligned_volume_name, pcg_table_name = get_relevant_datastack_info(datastack_name) From a6fca86e545988068820695ad0ef84cce2e9392e Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 23 Aug 2024 16:49:44 -0700 Subject: [PATCH 20/21] fix metadata for views and int=0 --- materializationengine/blueprints/client/api2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/materializationengine/blueprints/client/api2.py b/materializationengine/blueprints/client/api2.py index b8282b41..c06f6794 100644 --- a/materializationengine/blueprints/client/api2.py +++ b/materializationengine/blueprints/client/api2.py @@ -1677,7 +1677,7 @@ def get( @client_bp.expect(query_parser) @client_bp.route( - "/datastack//version//views//metadata" + "/datastack//version//views//metadata" ) class ViewMetadata(Resource): method_decorators = [ From 01006a29cdc7b4b117e02bb7126fa092877c41fb Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sat, 24 Aug 2024 16:46:14 -0700 Subject: [PATCH 21/21] make message returned more complex --- .../blueprints/materialize/api.py | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/materializationengine/blueprints/materialize/api.py b/materializationengine/blueprints/materialize/api.py index bd5c739d..a785560e 100644 --- a/materializationengine/blueprints/materialize/api.py +++ b/materializationengine/blueprints/materialize/api.py @@ -401,7 +401,12 @@ def post(self, datastack_name: str): response_model = mat_bp.model( - "Response", {"message": fields.String(description="Response message")} + "Response", + { + "message": fields.String(description="Response message"), + "csv_path": fields.String(description="Path to csv file", required=False), + "header_path": fields.String(description="Path to header file", required=False), + }, ) @@ -440,11 +445,17 @@ def post(self, datastack_name: str, version: int, table_name: str): filename = f"{datastack_name}/v{version}/{table_name}.csv.gz" cloudpath = os.path.join(bucket, filename) + header_file = f"{datastack_name}/v{version}/{table_name}_header.csv" + header_cloudpath = os.path.join(bucket, header_file) # check if the file already exists if cf.exists(filename): # return a flask respoonse 200 message that says that the file already exitss - return {"message": f"file already exists at {cloudpath}"}, 200 + return { + "message": "file already created", + "csv_path": cloudpath, + "header_path": header_cloudpath, + }, 200 else: # run a gcloud command to activate the service account for gcloud @@ -466,9 +477,6 @@ def post(self, datastack_name: str, version: int, table_name: str): "message": f"failed to activate service account using {activate_command}. Error: {stderr.decode()} stdout: {stdout.decode()}" }, 500 - header_file = f"{datastack_name}/v{version}/{table_name}_header.csv" - header_cloudpath = os.path.join(bucket, header_file) - header_command = [ "gcloud", "sql", @@ -521,7 +529,9 @@ def post(self, datastack_name: str, version: int, table_name: str): else: return { - "message": f"file created at {cloudpath}, header at {header_cloudpath}" + "message": "file created sucessefully", + "csv_path": cloudpath, + "header_path": header_cloudpath, }, 200