From da9f63009e2def5ef5c04eb1e1a360119070271f Mon Sep 17 00:00:00 2001 From: Marek Brysa Date: Thu, 3 Dec 2020 11:55:23 +0100 Subject: [PATCH 1/7] Add runs export command including model parsing --- databricks_cli/runs/api.py | 3 +++ databricks_cli/runs/cli.py | 41 +++++++++++++++++++++++++++++++++ tests/runs/test_cli.py | 46 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+) diff --git a/databricks_cli/runs/api.py b/databricks_cli/runs/api.py index d4e4843d..9ddec4cf 100644 --- a/databricks_cli/runs/api.py +++ b/databricks_cli/runs/api.py @@ -42,3 +42,6 @@ def cancel_run(self, run_id): def get_run_output(self, run_id): return self.client.get_run_output(run_id) + + def export_run(self, run_id, views_to_export=None): + return self.client.export_run(run_id, views_to_export) diff --git a/databricks_cli/runs/cli.py b/databricks_cli/runs/cli.py index 5d234e7e..f038a7cc 100644 --- a/databricks_cli/runs/cli.py +++ b/databricks_cli/runs/cli.py @@ -21,8 +21,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import base64 import click +import json +import re from tabulate import tabulate +from six.moves.urllib.parse import unquote_to_bytes from databricks_cli.click_types import OutputClickType, JsonClickType, RunIdClickType from databricks_cli.utils import eat_exceptions, CONTEXT_SETTINGS, pretty_format, json_cli_base, \ @@ -135,6 +139,42 @@ def get_output_cli(api_client, run_id): click.echo(pretty_format(RunsApi(api_client).get_run_output(run_id))) +@click.command(context_settings=CONTEXT_SETTINGS) +@click.option('--run-id', required=True, type=RunIdClickType()) +@click.option('--views-to-export', required=False, + type=click.Choice(['CODE', 'DASHBOARDS', 'ALL'], case_sensitive=False), default='ALL') +@click.option('--parse-model', is_flag=True, default=None, + help='Parse the Notebook model JSON embedded in the HTML of each view and add it as the "model" field.') +@debug_option +@profile_option +@eat_exceptions +@provide_api_client +def export_cli(api_client, run_id, views_to_export, parse_model): + """ + Export and retrieve the job run task. + + The output schema is documented https://docs.databricks.com/api/latest/jobs.html#runs-export. + """ + raw_export = RunsApi(api_client).export_run(run_id, views_to_export) + + if parse_model: + views = raw_export.get('views', []) + for (i, view) in enumerate(views): + content = view.get('content', '') + model_re = re.compile("__DATABRICKS_NOTEBOOK_MODEL\\s*=\\s*'(.*)';") + match = model_re.search(content) + if match is None: + click.echo("Could not parse model in view {}".format(i), err=True) + continue + model_base64 = match.group(1) + model_urlencoded = base64.b64decode(model_base64) + model_json = unquote_to_bytes(model_urlencoded) + model_data = json.loads(model_json) + view['model'] = model_data + + click.echo(pretty_format(raw_export)) + + @click.command(context_settings=CONTEXT_SETTINGS) @click.option('--run-id', required=True, type=RunIdClickType()) @debug_option @@ -165,5 +205,6 @@ def runs_group(): # pragma: no cover runs_group.add_command(submit_cli, name='submit') runs_group.add_command(list_cli, name='list') runs_group.add_command(get_cli, name='get') +runs_group.add_command(export_cli, name='export') runs_group.add_command(cancel_cli, name='cancel') runs_group.add_command(get_output_cli, name='get-output') diff --git a/tests/runs/test_cli.py b/tests/runs/test_cli.py index 5ecc9a4e..43ded030 100644 --- a/tests/runs/test_cli.py +++ b/tests/runs/test_cli.py @@ -105,3 +105,49 @@ def test_cancel_cli(runs_api_mock): runner.invoke(cli.cancel_cli, ['--run-id', 1]) assert runs_api_mock.cancel_run.call_args[0][0] == 1 assert echo_mock.call_args[0][0] == pretty_format({}) + + +EXPORT_RETURN = { + 'views': [ + {}, + { + 'content': 'invalid' + }, + { + # {"foo":"bar"} urlencoded and base64 encoded + 'content': "" + }, + ] +} + + +@provide_conf +def test_export_no_parse_model(runs_api_mock): + with mock.patch('databricks_cli.runs.cli.click.echo') as echo_mock: + runs_api_mock.export_run.return_value = EXPORT_RETURN + runner = CliRunner() + runner.invoke(cli.export_cli, ['--run-id', 1]) + assert runs_api_mock.export_run.call_args[0][0] == 1 + assert echo_mock.call_args[0][0] == pretty_format(EXPORT_RETURN) + +@provide_conf +def test_export_parse_model(runs_api_mock): + with mock.patch('databricks_cli.runs.cli.click.echo') as echo_mock: + runs_api_mock.export_run.return_value = EXPORT_RETURN + runner = CliRunner() + runner.invoke(cli.export_cli, ['--run-id', 1, '--parse-model']) + assert runs_api_mock.export_run.call_args[0][0] == 1 + assert echo_mock.call_args[0][0] == pretty_format({ + 'views': [ + {}, + { + 'content': 'invalid' + }, + { + 'content': "", + 'model': { + 'foo': 'bar' + } + }, + ] + }) From 858fc94cf2f0f520fd668993273a6082846f6347 Mon Sep 17 00:00:00 2001 From: Marek Brysa Date: Thu, 3 Dec 2020 13:39:12 +0100 Subject: [PATCH 2/7] fix tox-requirements-3.txt conflict --- tox-requirements-3.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tox-requirements-3.txt b/tox-requirements-3.txt index 0148019f..4f5cb82f 100644 --- a/tox-requirements-3.txt +++ b/tox-requirements-3.txt @@ -1,6 +1,6 @@ # Test reqs -prospector[with_pyroma]==1.3.0 -pylint==2.5.3 +prospector[with_pyroma]==1.3.* +pylint==2.5.* pep8-naming==0.5.0 pytest==3.8.1 mock==2.0.0 From e6cf90781c8880b5820d3e80aeb6c53e1e7c5af4 Mon Sep 17 00:00:00 2001 From: Marek Brysa Date: Thu, 3 Dec 2020 16:26:08 +0100 Subject: [PATCH 3/7] lint --- databricks_cli/runs/cli.py | 16 +++++++++------- tests/runs/test_cli.py | 7 +++++-- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/databricks_cli/runs/cli.py b/databricks_cli/runs/cli.py index f038a7cc..7e39e8d3 100644 --- a/databricks_cli/runs/cli.py +++ b/databricks_cli/runs/cli.py @@ -22,17 +22,18 @@ # limitations under the License. import base64 -import click -import json import re -from tabulate import tabulate +from json import loads as json_loads + +import click from six.moves.urllib.parse import unquote_to_bytes +from tabulate import tabulate from databricks_cli.click_types import OutputClickType, JsonClickType, RunIdClickType -from databricks_cli.utils import eat_exceptions, CONTEXT_SETTINGS, pretty_format, json_cli_base, \ - truncate_string from databricks_cli.configure.config import provide_api_client, profile_option, debug_option from databricks_cli.runs.api import RunsApi +from databricks_cli.utils import eat_exceptions, CONTEXT_SETTINGS, pretty_format, json_cli_base, \ + truncate_string from databricks_cli.version import print_version_callback, version @@ -144,7 +145,8 @@ def get_output_cli(api_client, run_id): @click.option('--views-to-export', required=False, type=click.Choice(['CODE', 'DASHBOARDS', 'ALL'], case_sensitive=False), default='ALL') @click.option('--parse-model', is_flag=True, default=None, - help='Parse the Notebook model JSON embedded in the HTML of each view and add it as the "model" field.') + help='Parse the Notebook model JSON embedded in the HTML of each view and add ' + 'it as the "model" field.') @debug_option @profile_option @eat_exceptions @@ -169,7 +171,7 @@ def export_cli(api_client, run_id, views_to_export, parse_model): model_base64 = match.group(1) model_urlencoded = base64.b64decode(model_base64) model_json = unquote_to_bytes(model_urlencoded) - model_data = json.loads(model_json) + model_data = json_loads(model_json) view['model'] = model_data click.echo(pretty_format(raw_export)) diff --git a/tests/runs/test_cli.py b/tests/runs/test_cli.py index 43ded030..55d7aaa7 100644 --- a/tests/runs/test_cli.py +++ b/tests/runs/test_cli.py @@ -115,7 +115,8 @@ def test_cancel_cli(runs_api_mock): }, { # {"foo":"bar"} urlencoded and base64 encoded - 'content': "" + 'content': "" }, ] } @@ -130,6 +131,7 @@ def test_export_no_parse_model(runs_api_mock): assert runs_api_mock.export_run.call_args[0][0] == 1 assert echo_mock.call_args[0][0] == pretty_format(EXPORT_RETURN) + @provide_conf def test_export_parse_model(runs_api_mock): with mock.patch('databricks_cli.runs.cli.click.echo') as echo_mock: @@ -144,7 +146,8 @@ def test_export_parse_model(runs_api_mock): 'content': 'invalid' }, { - 'content': "", + 'content': "", 'model': { 'foo': 'bar' } From 97bba88a9c101ca6e09af52d8aacd20fbaa36feb Mon Sep 17 00:00:00 2001 From: Marek Brysa Date: Fri, 4 Dec 2020 08:45:02 +0100 Subject: [PATCH 4/7] bump pylint for Python 2 --- tox-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox-requirements.txt b/tox-requirements.txt index 09396836..77b67718 100644 --- a/tox-requirements.txt +++ b/tox-requirements.txt @@ -1,6 +1,6 @@ # Test reqs prospector[with_pyroma]==0.12.7 -pylint==1.8.2 +pylint==1.9.5 pep8-naming==0.5.0 pytest==3.8.1 mock==2.0.0 From 20f9782ae2e0d5bbd4e3ff071f0328bae16604f1 Mon Sep 17 00:00:00 2001 From: Marek Brysa Date: Fri, 4 Dec 2020 11:00:48 +0100 Subject: [PATCH 5/7] disable broken lint --- databricks_cli/runs/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/databricks_cli/runs/cli.py b/databricks_cli/runs/cli.py index 7e39e8d3..e1e2aa1b 100644 --- a/databricks_cli/runs/cli.py +++ b/databricks_cli/runs/cli.py @@ -26,7 +26,7 @@ from json import loads as json_loads import click -from six.moves.urllib.parse import unquote_to_bytes +from six.moves.urllib.parse import unquote_to_bytes # pylint: disable=W0403 from tabulate import tabulate from databricks_cli.click_types import OutputClickType, JsonClickType, RunIdClickType From 47ab4303328b852ac3ff3c830e95cfbc6555d3e8 Mon Sep 17 00:00:00 2001 From: Marek Brysa Date: Fri, 4 Dec 2020 11:57:10 +0100 Subject: [PATCH 6/7] disable broken lint attempt 2 --- databricks_cli/runs/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/databricks_cli/runs/cli.py b/databricks_cli/runs/cli.py index e1e2aa1b..4e9ee5ea 100644 --- a/databricks_cli/runs/cli.py +++ b/databricks_cli/runs/cli.py @@ -26,7 +26,7 @@ from json import loads as json_loads import click -from six.moves.urllib.parse import unquote_to_bytes # pylint: disable=W0403 +from six.moves.urllib.parse import unquote_to_bytes # pylint: disable=relative-import from tabulate import tabulate from databricks_cli.click_types import OutputClickType, JsonClickType, RunIdClickType From 7de5399a0700adc3577cdac3bf41576d3aae1470 Mon Sep 17 00:00:00 2001 From: Marek Brysa Date: Fri, 4 Dec 2020 13:17:09 +0100 Subject: [PATCH 7/7] disable broken lint attempt 3 --- databricks_cli/runs/cli.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/databricks_cli/runs/cli.py b/databricks_cli/runs/cli.py index 4e9ee5ea..d2f24205 100644 --- a/databricks_cli/runs/cli.py +++ b/databricks_cli/runs/cli.py @@ -25,8 +25,12 @@ import re from json import loads as json_loads +try: + from urlparse import unquote_to_bytes +except ImportError: + from urllib.parse import unquote_to_bytes + import click -from six.moves.urllib.parse import unquote_to_bytes # pylint: disable=relative-import from tabulate import tabulate from databricks_cli.click_types import OutputClickType, JsonClickType, RunIdClickType