making env vars equivalent to whylogs'

whylabs · Dec 3, 2024 · ec88e91 · ec88e91
1 parent 4c697c8
commit ec88e91
Show file tree

Hide file tree

Showing 15 changed files with 93 additions and 61 deletions.
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -3,7 +3,7 @@
 import pytest
 
 from whylabs_toolkit.monitor.manager import MonitorSetup
-from whylabs_toolkit.monitor.models import *
+from whylabs_toolkit.monitor.models import DiffConfig, DiffMode, SimpleColumnMetric, TrailingWindowBaseline
 from whylabs_toolkit.helpers.config import UserConfig
 
 
@@ -21,7 +21,7 @@ def monitor_setup() -> MonitorSetup:
 @pytest.fixture
 def existing_monitor_setup() -> MonitorSetup:
     monitor_setup = MonitorSetup(
-        monitor_id=os.environ["MONITOR_ID"]
+        monitor_id=os.environ["WHYLABS_DEFAULT_MONITOR_ID"]
     )
     return monitor_setup
 
@@ -31,6 +31,6 @@ def user_config() -> UserConfig:
         api_key=os.environ["DEV_WHYLABS_API_KEY"],
         org_id=os.environ["DEV_ORG_ID"],
         dataset_id=os.environ["DEV_DATASET_ID"],
-        whylabs_host="https://songbird.development.whylabsdev.com"
+        whylabs_api_endpoint="https://songbird.development.whylabsdev.com"
     )
     return config
diff --git a/tests/helpers/test_config.py b/tests/helpers/test_config.py
@@ -7,7 +7,7 @@
 def test_setup_with_private_endpoint():
     os.environ["WHYLABS_PRIVATE_API_ENDPOINT"] = "http://private.com"
 
-    api_endpoint = Config().get_whylabs_host()
+    api_endpoint = Config().get_whylabs_api_endpoint()
 
     assert api_endpoint == "http://private.com"
 

diff --git a/tests/helpers/test_dataset_profiles.py b/tests/helpers/test_dataset_profiles.py
@@ -10,6 +10,10 @@
     process_date_input
 )
 
+ORG_ID = os.environ["WHYLABS_DEFAULT_ORG_ID"]
+DATASET_ID = os.environ["WHYLABS_DEFAULT_DATASET_ID"]
+
+
 def test_validate_timestamp_in_millis() -> None:
     assert validate_timestamp_in_millis(1627233600000) == True
     assert validate_timestamp_in_millis(-1231214) == False
@@ -42,38 +46,38 @@ def test_delete_profile_for_datetime_range():
     result = delete_all_profiles_for_period(
         start=datetime(2023,7,5), 
         end=datetime(2023,7,6), 
-        dataset_id = os.environ["DATASET_ID"], 
-        org_id=os.environ["ORG_ID"]
+        dataset_id = DATASET_ID,
+        org_id=ORG_ID
     )
 
-    assert result.get("id") == f"{os.environ['ORG_ID']}/{os.environ['DATASET_ID']}"
+    assert result.get("id") == f"{ORG_ID}/{DATASET_ID}"
 
 
 def test_delete_profiles_for_milliseconds_range():
     result = delete_all_profiles_for_period(
         start=int(datetime(2023,7,5).timestamp()*1000.0), 
         end=int(datetime(2023,7,6).timestamp()*1000.0), 
-        dataset_id = os.environ["DATASET_ID"], 
-        org_id=os.environ["ORG_ID"]
+        dataset_id = DATASET_ID,
+        org_id= ORG_ID
     )
 
-    assert result.get("id") == f"{os.environ['ORG_ID']}/{os.environ['DATASET_ID']}"
+    assert result.get("id") == f"{ORG_ID}/{DATASET_ID}"
 
 
 def test_delete_profiles_raises_if_other_format_is_passed():
     with pytest.raises(ValueError):
         delete_all_profiles_for_period(
             start=-123123123123, 
             end=int(datetime(2023,7,6).timestamp()*1000.0), 
-            dataset_id = os.environ["DATASET_ID"], 
-            org_id=os.environ["ORG_ID"]
+            dataset_id = DATASET_ID,
+            org_id= ORG_ID
         )
     with pytest.raises(ValueError):
         delete_all_profiles_for_period(
             start="string_example", 
             end=int(datetime(2023,7,6).timestamp()*1000.0), 
-            dataset_id = os.environ["DATASET_ID"], 
-            org_id=os.environ["ORG_ID"]
+            dataset_id = DATASET_ID,
+            org_id = ORG_ID
         )
 
 @patch('whylabs_toolkit.helpers.dataset_profiles.get_dataset_profile_api')
@@ -86,22 +90,22 @@ def test_delete_profiles_calls_delete_analyzer_results(mock_get_api):
 
 
     delete_all_profiles_for_period(
-        start=int(datetime(2023,7,5).timestamp()*1000.0), 
-        end=int(datetime(2023,7,6).timestamp()*1000.0), 
-        dataset_id = os.environ["DATASET_ID"], 
-        org_id=os.environ["ORG_ID"]
+        start = int(datetime(2023,7,5).timestamp()*1000.0),
+        end = int(datetime(2023,7,6).timestamp()*1000.0),
+        dataset_id = DATASET_ID,
+        org_id = ORG_ID
     )
 
     mock_call.delete_dataset_profiles.assert_called_with(
-        org_id=os.environ["ORG_ID"], 
-        dataset_id=os.environ["DATASET_ID"], 
+        org_id= ORG_ID,
+        dataset_id= DATASET_ID,
         profile_start_timestamp=int(datetime(2023,7,5).timestamp()*1000.0), 
         profile_end_timestamp=int(datetime(2023,7,6).timestamp()*1000.0)
     )
 
     mock_call.delete_analyzer_results.assert_called_with(
-        org_id=os.environ["ORG_ID"], 
-        dataset_id=os.environ["DATASET_ID"], 
+        org_id = ORG_ID,
+        dataset_id = DATASET_ID,
         start_timestamp=int(datetime(2023,7,5).timestamp()*1000.0), 
         end_timestamp=int(datetime(2023,7,6).timestamp()*1000.0)
     )
diff --git a/tests/helpers/test_entity_schema.py b/tests/helpers/test_entity_schema.py
@@ -11,8 +11,8 @@
 )
 from whylabs_toolkit.monitor.models.column_schema import ColumnDataType
 
-ORG_ID = os.environ["ORG_ID"]
-DATASET_ID = os.environ["DATASET_ID"]
+ORG_ID = os.environ["WHYLABS_DEFAULT_ORG_ID"]
+DATASET_ID = os.environ["WHYLABS_DEFAULT_DATASET_ID"]
 
 
 def test_change_columns_input_output() -> None:

diff --git a/tests/helpers/test_model.py b/tests/helpers/test_model.py
@@ -7,8 +7,8 @@
 from whylabs_toolkit.helpers.utils import get_models_api
 from whylabs_toolkit.helpers.config import Config
 
-ORG_ID = os.environ["ORG_ID"]
-DATASET_ID = os.environ["DATASET_ID"]
+ORG_ID = os.environ["WHYLABS_DEFAULT_ORG_ID"]
+DATASET_ID = os.environ["WHYLABS_DEFAULT_DATASET_ID"]
 
 
 @pytest.fixture

diff --git a/tests/helpers/test_monitor_helpers.py b/tests/helpers/test_monitor_helpers.py
@@ -12,10 +12,10 @@
 from whylabs_toolkit.utils.granularity import Granularity
 
 
-ORG_ID = os.environ["ORG_ID"]
-DATASET_ID = os.environ["DATASET_ID"]
-MONITOR_ID = os.environ["MONITOR_ID"]
-ANALYZER_ID = os.environ["ANALYZER_ID"]
+ORG_ID = os.environ["WHYLABS_DEFAULT_ORG_ID"]
+DATASET_ID = os.environ["WHYLABS_DEFAULT_DATASET_ID"]
+MONITOR_ID = os.environ["WHYLABS_DEFAULT_MONITOR_ID"]
+ANALYZER_ID = os.environ["WHYLABS_DEFAULT_ANALYZER_ID"]
 MONITOR_BODY = {
     "id": MONITOR_ID, "analyzerIds": [ANALYZER_ID], 
     "schedule": {"type": "immediate"}, 

diff --git a/tests/monitor/manager/test_credentials.py b/tests/monitor/manager/test_credentials.py
@@ -12,12 +12,12 @@ def credentials() -> MonitorCredentials:
     )
 
 def test_credentials_org_id_match_env_var(credentials):
-    expected_org_id = os.environ["ORG_ID"]
+    expected_org_id = os.environ["WHYLABS_DEFAULT_ORG_ID"]
     assert expected_org_id == credentials.org_id
 
 def test_analyzer_id_derived_from_monitor_id(credentials):
     assert credentials.analyzer_id == f"{credentials.monitor_id}-analyzer"
 
 def test_gets_dataset_id_from_env_var_if_not_passed(credentials):
-    expected_dataset_id = os.environ["DATASET_ID"]
+    expected_dataset_id = os.environ["WHYLABS_DEFAULT_DATASET_ID"]
     assert expected_dataset_id == credentials.dataset_id
diff --git a/tests/monitor/manager/test_manager.py b/tests/monitor/manager/test_manager.py
@@ -1,14 +1,12 @@
 import json
 import os
 from typing import Dict
-from unittest import TestCase
-from unittest.mock import call, MagicMock
 
 import pytest
 from jsonschema import ValidationError
 
 from whylabs_toolkit.monitor.manager import MonitorManager, MonitorSetup
-from whylabs_toolkit.monitor.models import *
+from whylabs_toolkit.monitor.models import GlobalAction
 from tests.helpers.test_monitor_helpers import BaseTestMonitor
 from whylabs_toolkit.helpers.monitor_helpers import get_monitor, get_analyzer_ids, get_monitor_config
 
@@ -39,19 +37,19 @@ def test_save(self, manager: MonitorManager) -> None:
         manager.save()
 
         monitor = get_monitor(
-            org_id=os.environ["ORG_ID"],
-            dataset_id=os.environ["DATASET_ID"],
-            monitor_id=os.environ["MONITOR_ID"]
+            org_id=os.environ["WHYLABS_DEFAULT_ORG_ID"],
+            dataset_id=os.environ["WHYLABS_DEFAULT_DATASET_ID"],
+            monitor_id=os.environ["WHYLABS_DEFAULT_MONITOR_ID"]
         )
 
         assert monitor is not None
         assert isinstance(monitor, Dict)
-        assert monitor.get("id") == os.environ["MONITOR_ID"]
+        assert monitor.get("id") == os.environ["WHYLABS_DEFAULT_MONITOR_ID"]
 
         assert get_analyzer_ids(
-            org_id=os.environ["ORG_ID"],
-            dataset_id=os.environ["DATASET_ID"],
-            monitor_id=os.environ["MONITOR_ID"]
+            org_id=os.environ["WHYLABS_DEFAULT_ORG_ID"],
+            dataset_id=os.environ["WHYLABS_DEFAULT_DATASET_ID"],
+            monitor_id=os.environ["WHYLABS_DEFAULT_MONITOR_ID"]
         )
 
     def test_monitor_running_eagerly(self, existing_monitor_setup: MonitorSetup) -> None:

diff --git a/tests/monitor/manager/test_monitor_setup.py b/tests/monitor/manager/test_monitor_setup.py
@@ -107,7 +107,7 @@ def test_existing_monitor_monitor_setup_with_id(self, existing_monitor_setup) ->
         assert isinstance(existing_monitor_setup.config, StddevConfig)
 
     def test_create_monitor_from_existing_monitor_id(self, existing_monitor_setup) -> None:
-        assert existing_monitor_setup.monitor.id == os.environ["MONITOR_ID"]
+        assert existing_monitor_setup.monitor.id == os.environ["WHYLABS_DEFAULT_MONITOR_ID"]
 
         new_credentials = MonitorCredentials(monitor_id="new_monitor_id")
 

diff --git a/whylabs_toolkit/cli/__init__.py b/whylabs_toolkit/cli/__init__.py
diff --git a/whylabs_toolkit/helpers/client.py b/whylabs_toolkit/helpers/client.py
@@ -4,7 +4,7 @@
 
 
 def create_client(config: Config = Config()) -> ApiClient:
-    client_config = Configuration(host=config.get_whylabs_host())
+    client_config = Configuration(host=config.get_whylabs_api_endpoint())
     client_config.api_key = {"ApiKeyAuth": config.get_whylabs_api_key()}
     client_config.discard_unknown_keys = True
     return ApiClient(client_config)
diff --git a/whylabs_toolkit/helpers/config.py b/whylabs_toolkit/helpers/config.py
@@ -7,43 +7,69 @@
 
 
 class ConfigVars(Enum):
-    ORG_ID = 1
-    DATASET_ID = 2
-    WHYLABS_API_KEY = 3
-    WHYLABS_HOST = "https://api.whylabsapp.com"
-    WHYLABS_PRIVATE_API_ENDPOINT = 5
+    WHYLABS_DEFAULT_ORG_ID = 1
+    WHYLABS_DEFAULT_DATASET_ID = 2
+    WHYLABS_API_ENDPOINT = "https://api.whylabsapp.com"
+    # keeping these three for backwards compatibility, but they should be removed in the future
+    ORG_ID = 3
+    DATASET_ID = 4
+    WHYLABS_API_KEY = 5
+    # TODO remove these two and favor only WHYLABS_API_ENDPOINT
+    WHYLABS_HOST = WHYLABS_API_ENDPOINT
+    WHYLABS_PRIVATE_API_ENDPOINT = 6
 
 
 class Config:
     def get_whylabs_api_key(self) -> str:
         return Validations.require(ConfigVars.WHYLABS_API_KEY)
 
+    # TODO deprecate this method
     def get_whylabs_host(self) -> str:
-        _private_api_endpoint = Validations.get_or_default(ConfigVars.WHYLABS_PRIVATE_API_ENDPOINT)
+        logger.warning("this method will be deprecated in future releases. use get_whylabs_api_endpoint instead")
+        whylabs_host = Validations.get(ConfigVars.WHYLABS_HOST)
+        if whylabs_host is not None:
+            logger.warning("WHYLABS_HOST will be deprecated, use WHYLABS_API_ENDPOINT instead.")
+            return whylabs_host
+        return self.get_whylabs_api_endpoint()
+
+    def get_whylabs_api_endpoint(self) -> str:
+        _private_api_endpoint = Validations.get(ConfigVars.WHYLABS_PRIVATE_API_ENDPOINT)
         if _private_api_endpoint and isinstance(_private_api_endpoint, str):
-            logger.debug(f"Using private API endpoint: {_private_api_endpoint}")
+            logger.warning(f"Using private API endpoint: {_private_api_endpoint}. "
+                           f"WHYLABS_PRIVATE_API_ENDPOINT will be deprecated in the future. "
+                           f"You should use the WHYLABS_API_ENDPOINT for this purpose.")
             return _private_api_endpoint
-        return Validations.get_or_default(ConfigVars.WHYLABS_HOST)
+        return Validations.get_or_default(ConfigVars.WHYLABS_API_ENDPOINT)
 
     def get_default_org_id(self) -> str:
-        return Validations.require(ConfigVars.ORG_ID)
+        org_id = Validations.get(ConfigVars.WHYLABS_DEFAULT_ORG_ID) or Validations.get(ConfigVars.ORG_ID)
+        if org_id is None:
+            raise TypeError("You need to specify WHYLABS_DEFAULT_ORG_ID")
+        return org_id
 
     def get_default_dataset_id(self) -> str:
-        return Validations.require(ConfigVars.DATASET_ID)
+        dataset_id = Validations.get(ConfigVars.WHYLABS_DEFAULT_DATASET_ID) or Validations.get(ConfigVars.DATASET_ID)
+        if dataset_id is None:
+            raise TypeError("You need to specify WHYLABS_DEFAULT_DATASET_ID")
+        return dataset_id
 
 
 class UserConfig(Config):
-    def __init__(self, api_key: str, org_id: str, dataset_id: str, whylabs_host: str = ConfigVars.WHYLABS_HOST.value):
+    def __init__(self, api_key: str, org_id: str, dataset_id: str, whylabs_api_endpoint: str = ConfigVars.WHYLABS_API_ENDPOINT.value):
         self.api_key = api_key
-        self.whylabs_host = whylabs_host
+        self.whylabs_api_endpoint = whylabs_api_endpoint
+        self.whylabs_host = self.whylabs_api_endpoint
         self.org_id = org_id
         self.dataset_id = dataset_id
 
     def get_whylabs_api_key(self) -> str:
         return self.api_key
 
+    def get_whylabs_api_endpoint(self) -> str:
+        return self.whylabs_api_endpoint
+
     def get_whylabs_host(self) -> str:
-        return self.whylabs_host
+        return self.get_whylabs_api_endpoint()
 
     def get_default_org_id(self) -> str:
         return self.org_id
@@ -66,3 +92,7 @@ def get_or_default(env: ConfigVars) -> str:
         if not val:
             raise TypeError(f"No default value for {env.name}")
         return val
+
+    @staticmethod
+    def get(env: ConfigVars) -> str:
+        return os.getenv(env.name)
diff --git a/whylabs_toolkit/helpers/monitor_helpers.py b/whylabs_toolkit/helpers/monitor_helpers.py
@@ -40,7 +40,7 @@ def get_monitor(
     try:
         monitor = api.get_monitor(org_id=org_id, dataset_id=dataset_id, monitor_id=monitor_id)
         return monitor
-    except (NotFoundException):
+    except NotFoundException:
         logger.info(f"Didn't find a monitor with id {monitor_id} for {dataset_id}. Creating a new one...")
         return None
     except ForbiddenException as e:

diff --git a/whylabs_toolkit/monitor/diagnoser/helpers/utils.py b/whylabs_toolkit/monitor/diagnoser/helpers/utils.py
@@ -17,7 +17,7 @@ def get_monitor_diagnostics_api(config: Config = Config()) -> MonitorDiagnostics
     """
     return MonitorDiagnosticsApi(api_client=create_client(config=config))
 
-
+# TODO this should not be required anymore, but need to test
 def env_setup(
     org_id: str, dataset_id: str, api_key: Optional[str] = None, whylabs_endpoint: Optional[str] = None
 ) -> None:

diff --git a/whylabs_toolkit/monitor/manager/README.md b/whylabs_toolkit/monitor/manager/README.md
@@ -10,11 +10,11 @@ The first step is to set your credentials to access WhyLabs. Define your environ
 ```python
 import os
 
-os.environ["ORG_ID"] = "org-id"
+os.environ["WHYLABS_DEFAULT_ORG_ID"] = "org-id"
 os.environ["WHYLABS_API_KEY"] = "api-key"
 
 # Option 1: set your dataset_id as an env var 
-os.environ["DATASET_ID"] = "dataset-id"
+os.environ["WHYLABS_DEFAULT_DATASET_ID"] = "dataset-id"
 ```
 
 ## Create a Monitor Setup