Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add runs export command including model parsing #354

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions databricks_cli/runs/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,6 @@ def cancel_run(self, run_id):

def get_run_output(self, run_id):
return self.client.get_run_output(run_id)

def export_run(self, run_id, views_to_export=None):
return self.client.export_run(run_id, views_to_export)
51 changes: 49 additions & 2 deletions databricks_cli/runs/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,23 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import base64
import re
from json import loads as json_loads

try:
from urlparse import unquote_to_bytes
except ImportError:
from urllib.parse import unquote_to_bytes

import click
from tabulate import tabulate

from databricks_cli.click_types import OutputClickType, JsonClickType, RunIdClickType
from databricks_cli.utils import eat_exceptions, CONTEXT_SETTINGS, pretty_format, json_cli_base, \
truncate_string
from databricks_cli.configure.config import provide_api_client, profile_option, debug_option
from databricks_cli.runs.api import RunsApi
from databricks_cli.utils import eat_exceptions, CONTEXT_SETTINGS, pretty_format, json_cli_base, \
truncate_string
from databricks_cli.version import print_version_callback, version


Expand Down Expand Up @@ -135,6 +144,43 @@ def get_output_cli(api_client, run_id):
click.echo(pretty_format(RunsApi(api_client).get_run_output(run_id)))


@click.command(context_settings=CONTEXT_SETTINGS)
@click.option('--run-id', required=True, type=RunIdClickType())
@click.option('--views-to-export', required=False,
type=click.Choice(['CODE', 'DASHBOARDS', 'ALL'], case_sensitive=False), default='ALL')
@click.option('--parse-model', is_flag=True, default=None,
help='Parse the Notebook model JSON embedded in the HTML of each view and add '
'it as the "model" field.')
@debug_option
@profile_option
@eat_exceptions
@provide_api_client
def export_cli(api_client, run_id, views_to_export, parse_model):
"""
Export and retrieve the job run task.

The output schema is documented https://docs.databricks.com/api/latest/jobs.html#runs-export.
"""
raw_export = RunsApi(api_client).export_run(run_id, views_to_export)

if parse_model:
views = raw_export.get('views', [])
for (i, view) in enumerate(views):
content = view.get('content', '')
model_re = re.compile("__DATABRICKS_NOTEBOOK_MODEL\\s*=\\s*'(.*)';")
match = model_re.search(content)
if match is None:
click.echo("Could not parse model in view {}".format(i), err=True)
continue
model_base64 = match.group(1)
model_urlencoded = base64.b64decode(model_base64)
model_json = unquote_to_bytes(model_urlencoded)
model_data = json_loads(model_json)
view['model'] = model_data

click.echo(pretty_format(raw_export))


@click.command(context_settings=CONTEXT_SETTINGS)
@click.option('--run-id', required=True, type=RunIdClickType())
@debug_option
Expand Down Expand Up @@ -165,5 +211,6 @@ def runs_group(): # pragma: no cover
runs_group.add_command(submit_cli, name='submit')
runs_group.add_command(list_cli, name='list')
runs_group.add_command(get_cli, name='get')
runs_group.add_command(export_cli, name='export')
runs_group.add_command(cancel_cli, name='cancel')
runs_group.add_command(get_output_cli, name='get-output')
49 changes: 49 additions & 0 deletions tests/runs/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,52 @@ def test_cancel_cli(runs_api_mock):
runner.invoke(cli.cancel_cli, ['--run-id', 1])
assert runs_api_mock.cancel_run.call_args[0][0] == 1
assert echo_mock.call_args[0][0] == pretty_format({})


EXPORT_RETURN = {
'views': [
{},
{
'content': 'invalid'
},
{
# {"foo":"bar"} urlencoded and base64 encoded
'content': "<script>var __DATABRICKS_NOTEBOOK_MODEL = "
"'JTdCJTIyZm9vJTIyJTNBJTIyYmFyJTIyJTdE';</script>"
},
]
}


@provide_conf
def test_export_no_parse_model(runs_api_mock):
with mock.patch('databricks_cli.runs.cli.click.echo') as echo_mock:
runs_api_mock.export_run.return_value = EXPORT_RETURN
runner = CliRunner()
runner.invoke(cli.export_cli, ['--run-id', 1])
assert runs_api_mock.export_run.call_args[0][0] == 1
assert echo_mock.call_args[0][0] == pretty_format(EXPORT_RETURN)


@provide_conf
def test_export_parse_model(runs_api_mock):
with mock.patch('databricks_cli.runs.cli.click.echo') as echo_mock:
runs_api_mock.export_run.return_value = EXPORT_RETURN
runner = CliRunner()
runner.invoke(cli.export_cli, ['--run-id', 1, '--parse-model'])
assert runs_api_mock.export_run.call_args[0][0] == 1
assert echo_mock.call_args[0][0] == pretty_format({
'views': [
{},
{
'content': 'invalid'
},
{
'content': "<script>var __DATABRICKS_NOTEBOOK_MODEL = "
"'JTdCJTIyZm9vJTIyJTNBJTIyYmFyJTIyJTdE';</script>",
'model': {
'foo': 'bar'
}
},
]
})
4 changes: 2 additions & 2 deletions tox-requirements-3.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Test reqs
prospector[with_pyroma]==1.3.0
pylint==2.5.3
prospector[with_pyroma]==1.3.*
pylint==2.5.*
pep8-naming==0.5.0
pytest==3.8.1
mock==2.0.0
Expand Down
2 changes: 1 addition & 1 deletion tox-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Test reqs
prospector[with_pyroma]==0.12.7
pylint==1.8.2
pylint==1.9.5
pep8-naming==0.5.0
pytest==3.8.1
mock==2.0.0
Expand Down