From 5d830ee76590c5c5206cb322ce242740c3420373 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Wed, 1 Apr 2020 23:48:51 +0200 Subject: [PATCH 1/4] Python Wheel Install Command Allows installation of wheels onto databricks clusters by using standard Python setuptools framework. E.g. ``` python setup.py databricks_install --cluster-id abcd --databricks-cli-profile staging ``` will do the following: 1) build wheel 2) use `staging` profile from CLI or throw error with instructions to configre 3) upload it to DBFS location (configurable as well) 4) install it on cluster `abcd` as `whl` library TODO: 1) wait until library is successfully installed or throw error 2) install library on cluster by name 3) install library on clusters by tag (e.g. team tags) --- databricks_cli/libraries/distutils.py | 97 ++++++++++++++++++ databricks_cli/utils.py | 6 +- setup.py | 2 + tests/libraries/test_distutils.py | 135 ++++++++++++++++++++++++++ 4 files changed, 237 insertions(+), 3 deletions(-) create mode 100644 databricks_cli/libraries/distutils.py create mode 100644 tests/libraries/test_distutils.py diff --git a/databricks_cli/libraries/distutils.py b/databricks_cli/libraries/distutils.py new file mode 100644 index 00000000..addbbdd6 --- /dev/null +++ b/databricks_cli/libraries/distutils.py @@ -0,0 +1,97 @@ +# Databricks CLI +# Copyright 2020 Databricks, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"), except +# that the use of services to which certain application programming +# interfaces (each, an "API") connect requires that the user first obtain +# a license for the use of the APIs from Databricks, Inc. ("Databricks"), +# by creating an account at www.databricks.com and agreeing to either (a) +# the Community Edition Terms of Service, (b) the Databricks Terms of +# Service, or (c) another written agreement between Licensee and Databricks +# for the use of the APIs. +# +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from setuptools import Command + +from databricks_cli.dbfs.api import DbfsApi +from databricks_cli.dbfs.dbfs_path import DbfsPath +from databricks_cli.configure.provider import get_config, ProfileConfigProvider +from databricks_cli.configure.config import _get_api_client +from databricks_cli.utils import InvalidConfigurationError + +import databricks_cli.sdk.service as service + + +class InstallLibraryCommand(Command): + user_options = [ + ('dbfs-path=', None, "Path of a library starting with dbfs://", + "default: dbfs:/FileStore/jars/{package_name}"), + ('cluster-id=', None, "cluster id to distribute it", None), + ('cluster-tag=', None, "cluster tag to install library", None), + ('cluster-name=', None, "cluster name to distribute it", None), + ('databricks-cli-profile=', None, "Databricks CLI profile name", None), + ] + + def initialize_options(self): + """Abstract method that is required to be overwritten""" + self.dbfs_path = None + self.cluster_id = None + self.cluster_name = None + self.cluster_tag = None + self.databricks_cli_profile = None + + def finalize_options(self): + """Abstract method that is required to be overwritten""" + if not self.dbfs_path: + package_name = self.distribution.get_name() + self.dbfs_path = f"dbfs:/FileStore/jars/{package_name}" + if not (self.cluster_id or self.cluster_name or self.cluster_tag): + raise RuntimeError('One of --cluster-id, --cluster-tag or --cluster-name should be provided') + + def _configure_api(self): + config = ProfileConfigProvider( + self.databricks_cli_profile + ).get_config() if self.databricks_cli_profile else get_config() + if not config or not config.is_valid: + raise InvalidConfigurationError.for_profile( + self.databricks_cli_profile, cli_tool='databricks') + return _get_api_client(config, "upload_library") + + def _upload_library(self, wheel_file): + from os.path import basename + dbfs = DbfsApi(self._configure_api()) + artifact = f'{self.dbfs_path}/{basename(wheel_file)}' + # TODO: iterate through previous versions & re-link to *-latest.wheel + dbfs.put_file(wheel_file, DbfsPath(artifact, validate=False), True) + return artifact + + def _install_library(self, artifact): + api_client = self._configure_api() + if self.cluster_tag: + raise RuntimeError('not yet supported') + if self.cluster_name: + raise RuntimeError('not yet supported') + cluster_id = self.cluster_id + libraries = service.ManagedLibraryService(api_client) + libraries.install_libraries(cluster_id, {'whl': artifact}) + # TODO: wait and check cluster status for library to be installed + + def run(self): + self.run_command('bdist_wheel') + if not self.distribution.dist_files: + raise RuntimeError('no dist files found') + for cmd, _, local_file in self.distribution.dist_files: + if not 'bdist_wheel' == cmd: + continue + artifact = self._upload_library(local_file) + self._install_library(artifact) diff --git a/databricks_cli/utils.py b/databricks_cli/utils.py index 03d89237..a3019a6a 100644 --- a/databricks_cli/utils.py +++ b/databricks_cli/utils.py @@ -89,13 +89,13 @@ def truncate_string(s, length=100): class InvalidConfigurationError(RuntimeError): @staticmethod - def for_profile(profile): + def for_profile(profile, cli_tool=sys.argv[0]): if profile is None: return InvalidConfigurationError( 'You haven\'t configured the CLI yet! ' - 'Please configure by entering `{} configure`'.format(sys.argv[0])) + 'Please configure by entering `{} configure`'.format(cli_tool)) return InvalidConfigurationError( ('You haven\'t configured the CLI yet for the profile {profile}! ' 'Please configure by entering ' '`{argv} configure --profile {profile}`').format( - profile=profile, argv=sys.argv[0])) + profile=profile, argv=cli_tool)) diff --git a/setup.py b/setup.py index b85489e9..1163d784 100644 --- a/setup.py +++ b/setup.py @@ -44,6 +44,8 @@ [console_scripts] databricks=databricks_cli.cli:cli dbfs=databricks_cli.dbfs.cli:dbfs_group + [distutils.commands] + databricks_install=databricks_cli.libraries.distutils:InstallLibraryCommand ''', zip_safe=False, author='Andrew Chen', diff --git a/tests/libraries/test_distutils.py b/tests/libraries/test_distutils.py new file mode 100644 index 00000000..eafd6d9e --- /dev/null +++ b/tests/libraries/test_distutils.py @@ -0,0 +1,135 @@ +# Databricks CLI +# Copyright 2020 Databricks, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"), except +# that the use of services to which certain application programming +# interfaces (each, an "API") connect requires that the user first obtain +# a license for the use of the APIs from Databricks, Inc. ("Databricks"), +# by creating an account at www.databricks.com and agreeing to either (a) +# the Community Edition Terms of Service, (b) the Databricks Terms of +# Service, or (c) another written agreement between Licensee and Databricks +# for the use of the APIs. +# +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint:disable=redefined-outer-name + +import mock +import pytest +import databricks_cli.libraries.distutils as st + +from databricks_cli.utils import InvalidConfigurationError +import setuptools.dist as dist +from databricks_cli.configure.provider import DatabricksConfig, DatabricksConfigProvider + + + +@pytest.fixture() +def libraries_api_mock(): + with mock.patch('databricks_cli.libraries.cli.LibrariesApi') as LibrariesApi: + _libraries_api_mock = mock.MagicMock() + LibrariesApi.return_value = _libraries_api_mock + yield _libraries_api_mock + + +def test_nothing(): + d = dist.Distribution() + ilc = st.InstallLibraryCommand(d) + ilc.initialize_options() + + assert ilc.cluster_id == None + assert ilc.cluster_name == None + assert ilc.cluster_tag == None + assert ilc.dbfs_path == None + + +def test_sets_name(): + d = dist.Distribution({'name': 'foo'}) + + ilc = st.InstallLibraryCommand(d) + ilc.initialize_options() + ilc.finalize_options() + + assert ilc.dbfs_path == 'dbfs:/FileStore/jars/foo' + + +def test_gets_api_client(): + class TestConfigProvider(DatabricksConfigProvider): + def get_config(self): + return DatabricksConfig.from_token("Override", "Token!") + provider = TestConfigProvider() + set_config_provider(provider) + + d = dist.Distribution({'name': 'foo'}) + + ilc = st.InstallLibraryCommand(d) + ilc.initialize_options() + ilc.finalize_options() + + api_client = ilc._configure_api() + assert None != api_client + + +def test_gets_api_client_with_profile(): + import time + d = dist.Distribution({'name': 'foo'}) + + ilc = st.InstallLibraryCommand(d) + ilc.initialize_options() + ilc.databricks_cli_profile = str(time.time()) + ilc.finalize_options() + + with pytest.raises(InvalidConfigurationError): + ilc._configure_api() + + +def test_upload_library(): + class TestConfigProvider(DatabricksConfigProvider): + def get_config(self): + return DatabricksConfig.from_token("Override", "Token!") + provider = TestConfigProvider() + set_config_provider(provider) + + d = dist.Distribution({'name': 'foo'}) + + ilc = st.InstallLibraryCommand(d) + ilc.initialize_options() + ilc.finalize_options() + + with mock.patch('databricks_cli.dbfs.api.DbfsApi.put_file') as put_file: + result = ilc._upload_library('foo/foo-0.0.1.wheel') + assert result == 'dbfs:/FileStore/jars/foo/foo-0.0.1.wheel' + put_file.assert_called_once() + assert 'foo/foo-0.0.1.wheel' == put_file.call_args[0][0] + + +def test_run_stuff(): + d = dist.Distribution({'name': 'foo'}) + d.dist_files = [('bdist_wheel', None, 'foo/foo-0.0.1.wheel')] + + ilc = st.InstallLibraryCommand(d) + ilc.initialize_options() + ilc.finalize_options() + + def _upload_library(f): + assert f == 'foo/foo-0.0.1.wheel' + return 'realpath' + + def _install_library(f): + assert 'realpath' == f + + ilc._upload_library = _upload_library + ilc._install_library = _install_library + + with mock.patch('distutils.cmd.Command.run_command') as run_command: + ilc.run() + run_command.assert_called_once() \ No newline at end of file From d5174fddcf80c1e6d032d8f9ce68a2a41fdc9f83 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Thu, 2 Apr 2020 00:48:07 +0200 Subject: [PATCH 2/4] Fix tests --- tests/libraries/test_distutils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/libraries/test_distutils.py b/tests/libraries/test_distutils.py index eafd6d9e..61340a52 100644 --- a/tests/libraries/test_distutils.py +++ b/tests/libraries/test_distutils.py @@ -29,8 +29,7 @@ from databricks_cli.utils import InvalidConfigurationError import setuptools.dist as dist -from databricks_cli.configure.provider import DatabricksConfig, DatabricksConfigProvider - +from databricks_cli.configure.provider import DatabricksConfig, DatabricksConfigProvider, set_config_provider @pytest.fixture() @@ -57,6 +56,7 @@ def test_sets_name(): ilc = st.InstallLibraryCommand(d) ilc.initialize_options() + ilc.cluster_id = 'abc' ilc.finalize_options() assert ilc.dbfs_path == 'dbfs:/FileStore/jars/foo' @@ -73,6 +73,7 @@ def get_config(self): ilc = st.InstallLibraryCommand(d) ilc.initialize_options() + ilc.cluster_id = 'abc' ilc.finalize_options() api_client = ilc._configure_api() @@ -85,6 +86,7 @@ def test_gets_api_client_with_profile(): ilc = st.InstallLibraryCommand(d) ilc.initialize_options() + ilc.cluster_id = 'abc' ilc.databricks_cli_profile = str(time.time()) ilc.finalize_options() @@ -103,6 +105,7 @@ def get_config(self): ilc = st.InstallLibraryCommand(d) ilc.initialize_options() + ilc.cluster_id = 'abc' ilc.finalize_options() with mock.patch('databricks_cli.dbfs.api.DbfsApi.put_file') as put_file: @@ -118,6 +121,7 @@ def test_run_stuff(): ilc = st.InstallLibraryCommand(d) ilc.initialize_options() + ilc.cluster_id = 'abc' ilc.finalize_options() def _upload_library(f): From a1e7fe30a31e319da52ae7d432c1293b4f12f44f Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Thu, 2 Apr 2020 01:02:30 +0200 Subject: [PATCH 3/4] Fixed linters --- databricks_cli/libraries/distutils.py | 16 ++++++++++------ tests/libraries/test_distutils.py | 21 +++++++++++---------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/databricks_cli/libraries/distutils.py b/databricks_cli/libraries/distutils.py index addbbdd6..814b64e2 100644 --- a/databricks_cli/libraries/distutils.py +++ b/databricks_cli/libraries/distutils.py @@ -42,21 +42,25 @@ class InstallLibraryCommand(Command): ('databricks-cli-profile=', None, "Databricks CLI profile name", None), ] - def initialize_options(self): - """Abstract method that is required to be overwritten""" + def __init__(self, dist, **kw): + Command.__init__(self, dist, **kw) self.dbfs_path = None self.cluster_id = None self.cluster_name = None self.cluster_tag = None self.databricks_cli_profile = None + def initialize_options(self): + pass + def finalize_options(self): """Abstract method that is required to be overwritten""" if not self.dbfs_path: package_name = self.distribution.get_name() - self.dbfs_path = f"dbfs:/FileStore/jars/{package_name}" + self.dbfs_path = 'dbfs:/FileStore/jars/' + package_name if not (self.cluster_id or self.cluster_name or self.cluster_tag): - raise RuntimeError('One of --cluster-id, --cluster-tag or --cluster-name should be provided') + msg = 'One of --cluster-id, --cluster-tag or --cluster-name should be provided' + raise RuntimeError(msg) def _configure_api(self): config = ProfileConfigProvider( @@ -70,7 +74,7 @@ def _configure_api(self): def _upload_library(self, wheel_file): from os.path import basename dbfs = DbfsApi(self._configure_api()) - artifact = f'{self.dbfs_path}/{basename(wheel_file)}' + artifact = self.dbfs_path + '/' + basename(wheel_file) # TODO: iterate through previous versions & re-link to *-latest.wheel dbfs.put_file(wheel_file, DbfsPath(artifact, validate=False), True) return artifact @@ -91,7 +95,7 @@ def run(self): if not self.distribution.dist_files: raise RuntimeError('no dist files found') for cmd, _, local_file in self.distribution.dist_files: - if not 'bdist_wheel' == cmd: + if not cmd == 'bdist_wheel': continue artifact = self._upload_library(local_file) self._install_library(artifact) diff --git a/tests/libraries/test_distutils.py b/tests/libraries/test_distutils.py index 61340a52..67fd8911 100644 --- a/tests/libraries/test_distutils.py +++ b/tests/libraries/test_distutils.py @@ -25,11 +25,12 @@ import mock import pytest -import databricks_cli.libraries.distutils as st +import setuptools.dist as dist +import databricks_cli.libraries.distutils as st from databricks_cli.utils import InvalidConfigurationError -import setuptools.dist as dist -from databricks_cli.configure.provider import DatabricksConfig, DatabricksConfigProvider, set_config_provider +from databricks_cli.configure.provider import DatabricksConfig, \ + DatabricksConfigProvider, set_config_provider @pytest.fixture() @@ -45,10 +46,10 @@ def test_nothing(): ilc = st.InstallLibraryCommand(d) ilc.initialize_options() - assert ilc.cluster_id == None - assert ilc.cluster_name == None - assert ilc.cluster_tag == None - assert ilc.dbfs_path == None + assert ilc.cluster_id is None + assert ilc.cluster_name is None + assert ilc.cluster_tag is None + assert ilc.dbfs_path is None def test_sets_name(): @@ -77,7 +78,7 @@ def get_config(self): ilc.finalize_options() api_client = ilc._configure_api() - assert None != api_client + assert api_client is not None def test_gets_api_client_with_profile(): @@ -112,7 +113,7 @@ def get_config(self): result = ilc._upload_library('foo/foo-0.0.1.wheel') assert result == 'dbfs:/FileStore/jars/foo/foo-0.0.1.wheel' put_file.assert_called_once() - assert 'foo/foo-0.0.1.wheel' == put_file.call_args[0][0] + assert put_file.call_args[0][0] == 'foo/foo-0.0.1.wheel' def test_run_stuff(): @@ -129,7 +130,7 @@ def _upload_library(f): return 'realpath' def _install_library(f): - assert 'realpath' == f + assert f == 'realpath' ilc._upload_library = _upload_library ilc._install_library = _install_library From 4a857514d763cfcf8dcf4c93fdfc4efead5f26da Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Thu, 2 Apr 2020 10:27:11 +0200 Subject: [PATCH 4/4] Last line style fix --- tests/libraries/test_distutils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/libraries/test_distutils.py b/tests/libraries/test_distutils.py index 67fd8911..7df0f678 100644 --- a/tests/libraries/test_distutils.py +++ b/tests/libraries/test_distutils.py @@ -137,4 +137,4 @@ def _install_library(f): with mock.patch('distutils.cmd.Command.run_command') as run_command: ilc.run() - run_command.assert_called_once() \ No newline at end of file + run_command.assert_called_once()