Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python Wheel Install Command #286

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 101 additions & 0 deletions databricks_cli/libraries/distutils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Databricks CLI
# Copyright 2020 Databricks, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"), except
# that the use of services to which certain application programming
# interfaces (each, an "API") connect requires that the user first obtain
# a license for the use of the APIs from Databricks, Inc. ("Databricks"),
# by creating an account at www.databricks.com and agreeing to either (a)
# the Community Edition Terms of Service, (b) the Databricks Terms of
# Service, or (c) another written agreement between Licensee and Databricks
# for the use of the APIs.
#
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from setuptools import Command

from databricks_cli.dbfs.api import DbfsApi
from databricks_cli.dbfs.dbfs_path import DbfsPath
from databricks_cli.configure.provider import get_config, ProfileConfigProvider
from databricks_cli.configure.config import _get_api_client
from databricks_cli.utils import InvalidConfigurationError

import databricks_cli.sdk.service as service


class InstallLibraryCommand(Command):
user_options = [
('dbfs-path=', None, "Path of a library starting with dbfs://",
"default: dbfs:/FileStore/jars/{package_name}"),
('cluster-id=', None, "cluster id to distribute it", None),
('cluster-tag=', None, "cluster tag to install library", None),
('cluster-name=', None, "cluster name to distribute it", None),
('databricks-cli-profile=', None, "Databricks CLI profile name", None),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should have a overwrite flag to be consistent with the databricks fs commands.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to overwrite a wheel? makes perfect sense.

what do you think about profile flag - maybe shorten it to --profile as well? so the command will be something like python setup.py databricks_install --cluster-id abcd --profile staging --owerwrite

]

def __init__(self, dist, **kw):
Command.__init__(self, dist, **kw)
self.dbfs_path = None
self.cluster_id = None
self.cluster_name = None
self.cluster_tag = None
self.databricks_cli_profile = None

def initialize_options(self):
pass

def finalize_options(self):
"""Abstract method that is required to be overwritten"""
if not self.dbfs_path:
package_name = self.distribution.get_name()
self.dbfs_path = 'dbfs:/FileStore/jars/' + package_name
if not (self.cluster_id or self.cluster_name or self.cluster_tag):
msg = 'One of --cluster-id, --cluster-tag or --cluster-name should be provided'
raise RuntimeError(msg)

def _configure_api(self):
config = ProfileConfigProvider(
self.databricks_cli_profile
).get_config() if self.databricks_cli_profile else get_config()
if not config or not config.is_valid:
raise InvalidConfigurationError.for_profile(
self.databricks_cli_profile, cli_tool='databricks')
return _get_api_client(config, "upload_library")

def _upload_library(self, wheel_file):
from os.path import basename
dbfs = DbfsApi(self._configure_api())
artifact = self.dbfs_path + '/' + basename(wheel_file)
# TODO: iterate through previous versions & re-link to *-latest.wheel
dbfs.put_file(wheel_file, DbfsPath(artifact, validate=False), True)
return artifact

def _install_library(self, artifact):
api_client = self._configure_api()
if self.cluster_tag:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we remove the options if they don't work?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'm about to add logic in next commit

raise RuntimeError('not yet supported')
if self.cluster_name:
raise RuntimeError('not yet supported')
cluster_id = self.cluster_id
libraries = service.ManagedLibraryService(api_client)
libraries.install_libraries(cluster_id, {'whl': artifact})
# TODO: wait and check cluster status for library to be installed

def run(self):
self.run_command('bdist_wheel')
if not self.distribution.dist_files:
raise RuntimeError('no dist files found')
for cmd, _, local_file in self.distribution.dist_files:
if not cmd == 'bdist_wheel':
continue
artifact = self._upload_library(local_file)
self._install_library(artifact)
6 changes: 3 additions & 3 deletions databricks_cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,13 @@ def truncate_string(s, length=100):

class InvalidConfigurationError(RuntimeError):
@staticmethod
def for_profile(profile):
def for_profile(profile, cli_tool=sys.argv[0]):
if profile is None:
return InvalidConfigurationError(
'You haven\'t configured the CLI yet! '
'Please configure by entering `{} configure`'.format(sys.argv[0]))
'Please configure by entering `{} configure`'.format(cli_tool))
return InvalidConfigurationError(
('You haven\'t configured the CLI yet for the profile {profile}! '
'Please configure by entering '
'`{argv} configure --profile {profile}`').format(
profile=profile, argv=sys.argv[0]))
profile=profile, argv=cli_tool))
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
[console_scripts]
databricks=databricks_cli.cli:cli
dbfs=databricks_cli.dbfs.cli:dbfs_group
[distutils.commands]
databricks_install=databricks_cli.libraries.distutils:InstallLibraryCommand
''',
zip_safe=False,
author='Andrew Chen',
Expand Down
140 changes: 140 additions & 0 deletions tests/libraries/test_distutils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# Databricks CLI
# Copyright 2020 Databricks, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"), except
# that the use of services to which certain application programming
# interfaces (each, an "API") connect requires that the user first obtain
# a license for the use of the APIs from Databricks, Inc. ("Databricks"),
# by creating an account at www.databricks.com and agreeing to either (a)
# the Community Edition Terms of Service, (b) the Databricks Terms of
# Service, or (c) another written agreement between Licensee and Databricks
# for the use of the APIs.
#
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# pylint:disable=redefined-outer-name

import mock
import pytest
import setuptools.dist as dist

import databricks_cli.libraries.distutils as st
from databricks_cli.utils import InvalidConfigurationError
from databricks_cli.configure.provider import DatabricksConfig, \
DatabricksConfigProvider, set_config_provider


@pytest.fixture()
def libraries_api_mock():
with mock.patch('databricks_cli.libraries.cli.LibrariesApi') as LibrariesApi:
_libraries_api_mock = mock.MagicMock()
LibrariesApi.return_value = _libraries_api_mock
yield _libraries_api_mock


def test_nothing():
d = dist.Distribution()
ilc = st.InstallLibraryCommand(d)
ilc.initialize_options()

assert ilc.cluster_id is None
assert ilc.cluster_name is None
assert ilc.cluster_tag is None
assert ilc.dbfs_path is None


def test_sets_name():
d = dist.Distribution({'name': 'foo'})

ilc = st.InstallLibraryCommand(d)
ilc.initialize_options()
ilc.cluster_id = 'abc'
ilc.finalize_options()

assert ilc.dbfs_path == 'dbfs:/FileStore/jars/foo'


def test_gets_api_client():
class TestConfigProvider(DatabricksConfigProvider):
def get_config(self):
return DatabricksConfig.from_token("Override", "Token!")
provider = TestConfigProvider()
set_config_provider(provider)

d = dist.Distribution({'name': 'foo'})

ilc = st.InstallLibraryCommand(d)
ilc.initialize_options()
ilc.cluster_id = 'abc'
ilc.finalize_options()

api_client = ilc._configure_api()
assert api_client is not None


def test_gets_api_client_with_profile():
import time
d = dist.Distribution({'name': 'foo'})

ilc = st.InstallLibraryCommand(d)
ilc.initialize_options()
ilc.cluster_id = 'abc'
ilc.databricks_cli_profile = str(time.time())
ilc.finalize_options()

with pytest.raises(InvalidConfigurationError):
ilc._configure_api()


def test_upload_library():
class TestConfigProvider(DatabricksConfigProvider):
def get_config(self):
return DatabricksConfig.from_token("Override", "Token!")
provider = TestConfigProvider()
set_config_provider(provider)

d = dist.Distribution({'name': 'foo'})

ilc = st.InstallLibraryCommand(d)
ilc.initialize_options()
ilc.cluster_id = 'abc'
ilc.finalize_options()

with mock.patch('databricks_cli.dbfs.api.DbfsApi.put_file') as put_file:
result = ilc._upload_library('foo/foo-0.0.1.wheel')
assert result == 'dbfs:/FileStore/jars/foo/foo-0.0.1.wheel'
put_file.assert_called_once()
assert put_file.call_args[0][0] == 'foo/foo-0.0.1.wheel'


def test_run_stuff():
d = dist.Distribution({'name': 'foo'})
d.dist_files = [('bdist_wheel', None, 'foo/foo-0.0.1.wheel')]

ilc = st.InstallLibraryCommand(d)
ilc.initialize_options()
ilc.cluster_id = 'abc'
ilc.finalize_options()

def _upload_library(f):
assert f == 'foo/foo-0.0.1.wheel'
return 'realpath'

def _install_library(f):
assert f == 'realpath'

ilc._upload_library = _upload_library
ilc._install_library = _install_library

with mock.patch('distutils.cmd.Command.run_command') as run_command:
ilc.run()
run_command.assert_called_once()