diff --git a/databricks_cli/runs/api.py b/databricks_cli/runs/api.py index d4e4843d..9ddec4cf 100644 --- a/databricks_cli/runs/api.py +++ b/databricks_cli/runs/api.py @@ -42,3 +42,6 @@ def cancel_run(self, run_id): def get_run_output(self, run_id): return self.client.get_run_output(run_id) + + def export_run(self, run_id, views_to_export=None): + return self.client.export_run(run_id, views_to_export) diff --git a/databricks_cli/runs/cli.py b/databricks_cli/runs/cli.py index 5d234e7e..d2f24205 100644 --- a/databricks_cli/runs/cli.py +++ b/databricks_cli/runs/cli.py @@ -21,14 +21,23 @@ # See the License for the specific language governing permissions and # limitations under the License. +import base64 +import re +from json import loads as json_loads + +try: + from urlparse import unquote_to_bytes +except ImportError: + from urllib.parse import unquote_to_bytes + import click from tabulate import tabulate from databricks_cli.click_types import OutputClickType, JsonClickType, RunIdClickType -from databricks_cli.utils import eat_exceptions, CONTEXT_SETTINGS, pretty_format, json_cli_base, \ - truncate_string from databricks_cli.configure.config import provide_api_client, profile_option, debug_option from databricks_cli.runs.api import RunsApi +from databricks_cli.utils import eat_exceptions, CONTEXT_SETTINGS, pretty_format, json_cli_base, \ + truncate_string from databricks_cli.version import print_version_callback, version @@ -135,6 +144,43 @@ def get_output_cli(api_client, run_id): click.echo(pretty_format(RunsApi(api_client).get_run_output(run_id))) +@click.command(context_settings=CONTEXT_SETTINGS) +@click.option('--run-id', required=True, type=RunIdClickType()) +@click.option('--views-to-export', required=False, + type=click.Choice(['CODE', 'DASHBOARDS', 'ALL'], case_sensitive=False), default='ALL') +@click.option('--parse-model', is_flag=True, default=None, + help='Parse the Notebook model JSON embedded in the HTML of each view and add ' + 'it as the "model" field.') +@debug_option +@profile_option +@eat_exceptions +@provide_api_client +def export_cli(api_client, run_id, views_to_export, parse_model): + """ + Export and retrieve the job run task. + + The output schema is documented https://docs.databricks.com/api/latest/jobs.html#runs-export. + """ + raw_export = RunsApi(api_client).export_run(run_id, views_to_export) + + if parse_model: + views = raw_export.get('views', []) + for (i, view) in enumerate(views): + content = view.get('content', '') + model_re = re.compile("__DATABRICKS_NOTEBOOK_MODEL\\s*=\\s*'(.*)';") + match = model_re.search(content) + if match is None: + click.echo("Could not parse model in view {}".format(i), err=True) + continue + model_base64 = match.group(1) + model_urlencoded = base64.b64decode(model_base64) + model_json = unquote_to_bytes(model_urlencoded) + model_data = json_loads(model_json) + view['model'] = model_data + + click.echo(pretty_format(raw_export)) + + @click.command(context_settings=CONTEXT_SETTINGS) @click.option('--run-id', required=True, type=RunIdClickType()) @debug_option @@ -165,5 +211,6 @@ def runs_group(): # pragma: no cover runs_group.add_command(submit_cli, name='submit') runs_group.add_command(list_cli, name='list') runs_group.add_command(get_cli, name='get') +runs_group.add_command(export_cli, name='export') runs_group.add_command(cancel_cli, name='cancel') runs_group.add_command(get_output_cli, name='get-output') diff --git a/tests/runs/test_cli.py b/tests/runs/test_cli.py index 5ecc9a4e..55d7aaa7 100644 --- a/tests/runs/test_cli.py +++ b/tests/runs/test_cli.py @@ -105,3 +105,52 @@ def test_cancel_cli(runs_api_mock): runner.invoke(cli.cancel_cli, ['--run-id', 1]) assert runs_api_mock.cancel_run.call_args[0][0] == 1 assert echo_mock.call_args[0][0] == pretty_format({}) + + +EXPORT_RETURN = { + 'views': [ + {}, + { + 'content': 'invalid' + }, + { + # {"foo":"bar"} urlencoded and base64 encoded + 'content': "" + }, + ] +} + + +@provide_conf +def test_export_no_parse_model(runs_api_mock): + with mock.patch('databricks_cli.runs.cli.click.echo') as echo_mock: + runs_api_mock.export_run.return_value = EXPORT_RETURN + runner = CliRunner() + runner.invoke(cli.export_cli, ['--run-id', 1]) + assert runs_api_mock.export_run.call_args[0][0] == 1 + assert echo_mock.call_args[0][0] == pretty_format(EXPORT_RETURN) + + +@provide_conf +def test_export_parse_model(runs_api_mock): + with mock.patch('databricks_cli.runs.cli.click.echo') as echo_mock: + runs_api_mock.export_run.return_value = EXPORT_RETURN + runner = CliRunner() + runner.invoke(cli.export_cli, ['--run-id', 1, '--parse-model']) + assert runs_api_mock.export_run.call_args[0][0] == 1 + assert echo_mock.call_args[0][0] == pretty_format({ + 'views': [ + {}, + { + 'content': 'invalid' + }, + { + 'content': "", + 'model': { + 'foo': 'bar' + } + }, + ] + }) diff --git a/tox-requirements-3.txt b/tox-requirements-3.txt index 0148019f..4f5cb82f 100644 --- a/tox-requirements-3.txt +++ b/tox-requirements-3.txt @@ -1,6 +1,6 @@ # Test reqs -prospector[with_pyroma]==1.3.0 -pylint==2.5.3 +prospector[with_pyroma]==1.3.* +pylint==2.5.* pep8-naming==0.5.0 pytest==3.8.1 mock==2.0.0 diff --git a/tox-requirements.txt b/tox-requirements.txt index 09396836..77b67718 100644 --- a/tox-requirements.txt +++ b/tox-requirements.txt @@ -1,6 +1,6 @@ # Test reqs prospector[with_pyroma]==0.12.7 -pylint==1.8.2 +pylint==1.9.5 pep8-naming==0.5.0 pytest==3.8.1 mock==2.0.0