From d2980e69ef23ad8665d19fde15998a488eabffd2 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 08:36:15 +0100 Subject: [PATCH 01/27] refactor tests --- src/activity.py | 2 +- src/activity_monitor.py | 76 ++++++++++++++++++++++------------ tests/test_activity_monitor.py | 10 ++--- 3 files changed, 55 insertions(+), 33 deletions(-) diff --git a/src/activity.py b/src/activity.py index c8c7989..e7000d2 100644 --- a/src/activity.py +++ b/src/activity.py @@ -1,4 +1,4 @@ import requests -r = requests.get("http://localhost:19597") +r = requests.get("http://localhost:19597/activity") print(r.text) diff --git a/src/activity_monitor.py b/src/activity_monitor.py index 7b763ac..f422f35 100755 --- a/src/activity_monitor.py +++ b/src/activity_monitor.py @@ -3,6 +3,7 @@ import psutil import requests import time +import os from abc import abstractmethod from concurrent.futures import ThreadPoolExecutor, as_completed @@ -13,25 +14,43 @@ from typing import Final -_logger = logging.getLogger(__name__) +_KB: Final[int] = 1024 +_MB: Final[int] = 1024 * _KB +_GB: Final[int] = 1024 * _MB +_TB: Final[int] = 1024 * _GB +_ENV_VAR_PREFIX: Final[str] = "ACTIVITY_MONITOR_BUSY_THRESHOLD" + +# NOTE: using high thresholds to make service by default +# considered inactive. +# If the service owner does not change these, by lowering +# them to an adequate value the service will always be shut +# down as soon as the inactivity period is detected. +BUSY_USAGE_THRESHOLD_CPU: Final[float] = os.environ.get( + f"{_ENV_VAR_PREFIX}_CPU_PERCENT", 1000 +) +BUSY_USAGE_THRESHOLD_DISK_READ: Final[int] = os.environ.get( + f"{_ENV_VAR_PREFIX}_DISK_READ_BPS", 1 * _TB +) +BUSY_USAGE_THRESHOLD_DISK_WRITE: Final[int] = os.environ.get( + f"{_ENV_VAR_PREFIX}_DISK_WRITE_BPS", 1 * _TB +) +BUSY_USAGE_THRESHOLD_NETWORK_RECEIVED: Final[int] = os.environ.get( + f"{_ENV_VAR_PREFIX}_NETWORK_RECEIVE_BPS", 1 * _TB +) +BUSY_USAGE_THRESHOLD_NETWORK_SENT: Final[int] = os.environ.get( + f"{_ENV_VAR_PREFIX}_NETWORK_SENT__BPS", 1 * _TB +) LISTEN_PORT: Final[int] = 19597 - KERNEL_CHECK_INTERVAL_S: Final[float] = 5 CHECK_INTERVAL_S: Final[float] = 1 -THREAD_EXECUTOR_WORKERS: Final[int] = 10 - -_KB: Final[int] = 1024 +_THREAD_EXECUTOR_WORKERS: Final[int] = 10 -BUSY_USAGE_THRESHOLD_CPU: Final[float] = 0.5 # percent in range [0, 100] -BUSY_USAGE_THRESHOLD_DISK_READ: Final[int] = 0 -BUSY_USAGE_THRESHOLD_DISK_WRITE: Final[int] = 0 -BUSY_USAGE_THRESHOLD_NETWORK_RECEIVED: Final[int] = 1 * _KB -BUSY_USAGE_THRESHOLD_NETWORK_SENT: Final[int] = 1 * _KB +_logger = logging.getLogger(__name__) -# Utilities +############### Utils class AbstractIsBusyMonitor: def __init__(self, poll_interval: float) -> None: self._poll_interval: float = poll_interval @@ -39,7 +58,7 @@ def __init__(self, poll_interval: float) -> None: self._thread: Thread | None = None self.is_busy: bool = True - self.thread_executor = ThreadPoolExecutor(max_workers=THREAD_EXECUTOR_WORKERS) + self.thread_executor = ThreadPoolExecutor(max_workers=_THREAD_EXECUTOR_WORKERS) @abstractmethod def _check_if_busy(self) -> bool: @@ -105,7 +124,7 @@ def _get_sibling_processes() -> list[psutil.Process]: return [c for c in all_children if c.pid != current_process.pid] -# Monitors +############### Monitors class JupyterKernelMonitor(AbstractIsBusyMonitor): @@ -442,7 +461,14 @@ def stop(self) -> None: self._thread.join() -def _get_response_debug(activity_manager: ActivityManager) -> dict: +############### Http Server + + +def _get_activity_response(activity_manager: ActivityManager) -> dict: + return {"seconds_inactive": activity_manager.get_idle_seconds()} + + +def _get_debug_response(activity_manager: ActivityManager) -> dict: return { "seconds_inactive": activity_manager.get_idle_seconds(), "cpu_usage": { @@ -467,10 +493,6 @@ def _get_response_debug(activity_manager: ActivityManager) -> dict: } -def _get_response_root(activity_manager: ActivityManager) -> dict: - return {"seconds_inactive": activity_manager.get_idle_seconds()} - - class ServerState: pass @@ -488,17 +510,17 @@ def _send_response(self, code: int, data: dict) -> None: self.end_headers() self.wfile.write(json.dumps(data).encode("utf-8")) - def do_GET(self): - state = self.server.state + @property + def activity_manager(self) -> ActivityManager: + return self.server.state.activity_manager - if self.path == "/": # The root endpoint - self._send_response(200, _get_response_root(state.activity_manager)) - elif self.path == "/debug": # The debug endpoint - self._send_response(200, _get_response_debug(state.activity_manager)) + def do_GET(self): + if self.path == "/activity": + self._send_response(200, _get_activity_response(self.activity_manager)) + elif self.path == "/debug": + self._send_response(200, _get_debug_response(self.activity_manager)) else: # Handle case where the endpoint is not found - self._send_response( - 404, _get_response_debug({"error": "Resource not found"}) - ) + self._send_response(404, {"error": "Resource not found"}) def make_server(port: int) -> HTTPServerWithState: diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index 61962e2..51ee0f6 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -221,7 +221,7 @@ def _run_server_worker() -> None: stop=stop_after_delay(3), wait=wait_fixed(0.1), reraise=True ): with attempt: - result = requests.get(f"{server_url}/", timeout=1) + result = requests.get(f"{server_url}/activity", timeout=1) assert result.status_code == 200, result.text yield None @@ -230,12 +230,12 @@ def _run_server_worker() -> None: server.server_close() with pytest.raises(requests.exceptions.RequestException): - requests.get(f"{server_url}/", timeout=1) + requests.get(f"{server_url}/activity", timeout=1) @pytest.mark.parametrize("are_kernels_busy", [False]) async def test_http_server_ok(http_server: None, server_url: str): - result = requests.get(f"{server_url}/", timeout=5) + result = requests.get(f"{server_url}/activity", timeout=1) assert result.status_code == 200 @@ -270,7 +270,7 @@ async def test_activity_monitor_becomes_not_busy( ): with attempt: # check that all become not busy - result = requests.get(f"{server_url}/debug", timeout=5) + result = requests.get(f"{server_url}/debug", timeout=1) assert result.status_code == 200 debug_response = result.json() assert debug_response["cpu_usage"]["is_busy"] is False @@ -278,7 +278,7 @@ async def test_activity_monitor_becomes_not_busy( assert debug_response["kernel_monitor"]["is_busy"] is False assert debug_response["network_usage"]["is_busy"] is False - result = requests.get(f"{server_url}/", timeout=2) + result = requests.get(f"{server_url}/activity", timeout=1) assert result.status_code == 200 response = result.json() assert response["seconds_inactive"] > 0 From c8595f2c6c8c3937cc3ca525c60c0982aff83b85 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 08:43:39 +0100 Subject: [PATCH 02/27] trying different python versions --- .github/workflows/test_pr_merge.yaml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test_pr_merge.yaml b/.github/workflows/test_pr_merge.yaml index 5b771d6..2762841 100644 --- a/.github/workflows/test_pr_merge.yaml +++ b/.github/workflows/test_pr_merge.yaml @@ -10,11 +10,18 @@ on: jobs: tests: + strategy: + matrix: + python-version: [3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 3.10, 3.11, 3.12] runs-on: ubuntu-latest steps: + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} - name: Checkout repo content uses: actions/checkout@v2 - - name: install dependencies + - name: Install dependencies run: ./scripts/ci_tests.sh install - - name: run tests - run: ./scripts/ci_tests.sh test \ No newline at end of file + - name: Run tests + run: ./scripts/ci_tests.sh test From d1468755903f5a905dde940c7c1e70ebfbe2055c Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 08:46:28 +0100 Subject: [PATCH 03/27] bumping version --- .github/workflows/test_pr_merge.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_pr_merge.yaml b/.github/workflows/test_pr_merge.yaml index 2762841..dd92440 100644 --- a/.github/workflows/test_pr_merge.yaml +++ b/.github/workflows/test_pr_merge.yaml @@ -16,7 +16,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Checkout repo content From 5d8d821bff47c871cbfc61a7ba2e0426ef42b0ee Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 09:05:38 +0100 Subject: [PATCH 04/27] updated requirements --- requirements/test.in | 4 +-- requirements/test.txt | 5 ---- scripts/install.sh | 15 ++++++----- tests/test_activity_monitor.py | 48 ++++++++++++++++------------------ 4 files changed, 31 insertions(+), 41 deletions(-) diff --git a/requirements/test.in b/requirements/test.in index 202a1b1..d7dc480 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -1,12 +1,10 @@ -# from jupyter +# required packages psutil -tornado # testing pytest -pytest-asyncio pytest-cov pytest-mock requests diff --git a/requirements/test.txt b/requirements/test.txt index 43fae43..f17b2de 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -25,11 +25,8 @@ psutil==5.9.8 pytest==8.1.1 # via # -r requirements/test.in - # pytest-asyncio # pytest-cov # pytest-mock -pytest-asyncio==0.23.6 - # via -r requirements/test.in pytest-cov==4.1.0 # via -r requirements/test.in pytest-mock==3.12.0 @@ -48,7 +45,5 @@ tomli==2.0.1 # via # coverage # pytest -tornado==6.4 - # via -r requirements/test.in urllib3==2.2.1 # via requests diff --git a/scripts/install.sh b/scripts/install.sh index d4f2a29..4f7386e 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -15,7 +15,7 @@ IFS=$'\n\t' # Function to display usage information usage() { echo "Usage: $0 " - echo "Example: $0 v.0.0.9-debug" + echo "Example: $0 v.0.0.1" exit 1 } @@ -24,22 +24,23 @@ if [ $# -ne 1 ]; then usage fi -TAG=$1 -URL="https://github.com/ITISFoundation/service-activity-monitor/releases/download/$TAG/release_archive_$TAG.zip" # Download and install +TAG=$1 +URL="https://github.com/ITISFoundation/service-activity-monitor/releases/download/$TAG/release_archive_$TAG.zip" echo "Downloading release $TAG..." curl -sSL -o /tmp/release.zip "$URL" -echo "Extracting files..." +echo "Installing..." + +# python scripts mkdir -p /usr/local/bin/service-monitor unzip -q /tmp/release.zip -d /usr/local/bin/service-monitor +# requirements +pip install psutil -echo "Installing..." -# Here you can write your installation steps, for now let's just echo the installation is complete echo "Installation complete." # Cleanup rm /tmp/release.zip - echo "Done!" diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index 51ee0f6..e9b0091 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -1,8 +1,6 @@ -import asyncio import json import psutil import pytest -import pytest_asyncio import requests import requests_mock import threading @@ -10,7 +8,7 @@ from typing import Callable, Final, Iterable, TYPE_CHECKING from pytest_mock import MockFixture -from tenacity import AsyncRetrying +from tenacity import Retrying from tenacity.stop import stop_after_delay from tenacity.wait import wait_fixed from conftest import _ActivityGenerator @@ -24,8 +22,6 @@ allow_imports() import activity_monitor -pytestmark = pytest.mark.asyncio - @pytest.fixture def mock__get_sibling_processes( @@ -47,7 +43,7 @@ def _(pids: list[int]) -> None: return _ -async def test_cpu_usage_monitor_not_busy( +def test_cpu_usage_monitor_not_busy( socket_server: None, mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], @@ -56,7 +52,7 @@ async def test_cpu_usage_monitor_not_busy( mock__get_sibling_processes([activity_generator.get_pid()]) with activity_monitor.CPUUsageMonitor(1, busy_threshold=5) as cpu_usage_monitor: - async for attempt in AsyncRetrying( + for attempt in Retrying( stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True ): with attempt: @@ -64,7 +60,7 @@ async def test_cpu_usage_monitor_not_busy( assert cpu_usage_monitor.is_busy is False -async def test_cpu_usage_monitor_still_busy( +def test_cpu_usage_monitor_still_busy( socket_server: None, mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], @@ -74,14 +70,14 @@ async def test_cpu_usage_monitor_still_busy( with activity_monitor.CPUUsageMonitor(0.5, busy_threshold=5) as cpu_usage_monitor: # wait for monitor to trigger - await asyncio.sleep(1) + time.sleep(1) # must still result busy assert cpu_usage_monitor.total_cpu_usage > 0 assert cpu_usage_monitor.is_busy is True -async def test_disk_usage_monitor_not_busy( +def test_disk_usage_monitor_not_busy( socket_server: None, mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], @@ -92,7 +88,7 @@ async def test_disk_usage_monitor_not_busy( with activity_monitor.DiskUsageMonitor( 0.5, read_usage_threshold=0, write_usage_threshold=0 ) as disk_usage_monitor: - async for attempt in AsyncRetrying( + for attempt in Retrying( stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True ): with attempt: @@ -103,7 +99,7 @@ async def test_disk_usage_monitor_not_busy( assert disk_usage_monitor.is_busy is False -async def test_disk_usage_monitor_still_busy( +def test_disk_usage_monitor_still_busy( socket_server: None, mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], @@ -115,7 +111,7 @@ async def test_disk_usage_monitor_still_busy( 0.5, read_usage_threshold=0, write_usage_threshold=0 ) as disk_usage_monitor: # wait for monitor to trigger - await asyncio.sleep(1) + time.sleep(1) write_bytes = disk_usage_monitor.total_bytes_write # NOTE: due to os disk cache reading is not reliable not testing it assert write_bytes > 0 @@ -132,7 +128,7 @@ def mock_no_network_activity(mocker: MockFixture) -> None: ) -async def test_network_usage_monitor_not_busy( +def test_network_usage_monitor_not_busy( mock_no_network_activity: None, socket_server: None, mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], @@ -144,7 +140,7 @@ async def test_network_usage_monitor_not_busy( with activity_monitor.NetworkUsageMonitor( 0.5, received_usage_threshold=0, sent_usage_threshold=0 ) as network_usage_monitor: - async for attempt in AsyncRetrying( + for attempt in Retrying( stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True ): with attempt: @@ -159,7 +155,7 @@ def mock_network_monitor_exclude_interfaces(mocker: MockFixture) -> None: assert activity_monitor.NetworkUsageMonitor._EXCLUDE_INTERFACES == set() -async def test_network_usage_monitor_still_busy( +def test_network_usage_monitor_still_busy( mock_network_monitor_exclude_interfaces: None, socket_server: None, mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], @@ -172,7 +168,7 @@ async def test_network_usage_monitor_still_busy( 0.5, received_usage_threshold=0, sent_usage_threshold=0 ) as network_usage_monitor: # wait for monitor to trigger - await asyncio.sleep(1) + time.sleep(1) assert network_usage_monitor.bytes_received > 0 assert network_usage_monitor.bytes_sent > 0 @@ -193,7 +189,7 @@ def mock_jupyter_kernel_monitor(are_kernels_busy: bool) -> Iterable[None]: @pytest.mark.parametrize("are_kernels_busy", [True, False]) -async def test_jupyter_kernel_monitor( +def test_jupyter_kernel_monitor( mock_jupyter_kernel_monitor: None, are_kernels_busy: bool ): kernel_monitor = activity_monitor.JupyterKernelMonitor(1) @@ -201,13 +197,13 @@ async def test_jupyter_kernel_monitor( assert kernel_monitor.are_kernels_busy is are_kernels_busy -@pytest_asyncio.fixture -async def server_url() -> str: +@pytest.fixture +def server_url() -> str: return f"http://localhost:{activity_monitor.LISTEN_PORT}" -@pytest_asyncio.fixture -async def http_server(mock_jupyter_kernel_monitor: None, server_url: str) -> None: +@pytest.fixture +def http_server(mock_jupyter_kernel_monitor: None, server_url: str) -> None: server = activity_monitor.make_server(activity_monitor.LISTEN_PORT) def _run_server_worker() -> None: @@ -217,7 +213,7 @@ def _run_server_worker() -> None: thread.start() # ensure server is running - async for attempt in AsyncRetrying( + for attempt in Retrying( stop=stop_after_delay(3), wait=wait_fixed(0.1), reraise=True ): with attempt: @@ -234,7 +230,7 @@ def _run_server_worker() -> None: @pytest.mark.parametrize("are_kernels_busy", [False]) -async def test_http_server_ok(http_server: None, server_url: str): +def test_http_server_ok(http_server: None, server_url: str): result = requests.get(f"{server_url}/activity", timeout=1) assert result.status_code == 200 @@ -254,7 +250,7 @@ def mock_activity_manager_config(mocker: MockFixture) -> None: @pytest.mark.parametrize("are_kernels_busy", [False]) -async def test_activity_monitor_becomes_not_busy( +def test_activity_monitor_becomes_not_busy( mock_activity_manager_config: None, socket_server: None, mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], @@ -265,7 +261,7 @@ async def test_activity_monitor_becomes_not_busy( activity_generator = create_activity_generator(network=False, cpu=False, disk=False) mock__get_sibling_processes([activity_generator.get_pid()]) - async for attempt in AsyncRetrying( + for attempt in Retrying( stop=stop_after_delay(10), wait=wait_fixed(0.1), reraise=True ): with attempt: From 9b7f3878107f5500c82b7cb78feaa14fc2cbd0dc Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 09:27:50 +0100 Subject: [PATCH 05/27] refactor --- src/activity_monitor.py | 157 ++++++++++++++++++++++++---------------- 1 file changed, 96 insertions(+), 61 deletions(-) diff --git a/src/activity_monitor.py b/src/activity_monitor.py index f422f35..cb691b7 100755 --- a/src/activity_monitor.py +++ b/src/activity_monitor.py @@ -11,7 +11,7 @@ from datetime import datetime from http.server import HTTPServer, BaseHTTPRequestHandler from threading import Thread -from typing import Final +from typing import Final, Any _KB: Final[int] = 1024 @@ -41,6 +41,21 @@ f"{_ENV_VAR_PREFIX}_NETWORK_SENT__BPS", 1 * _TB ) +# The following flags disable different monitors +DISABLE_JUPYTER_KERNEL_MONITOR: Final[bool] = ( + os.environ.get("DISABLE_JUPYTER_KERNEL_MONITOR", None) is not None +) +DISABLE_CPU_USAGE_MONITOR: Final[bool] = ( + os.environ.get("DISABLE_CPU_USAGE_MONITOR", None) is not None +) +DISABLE_DISK_USAGE_MONITOR: Final[bool] = ( + os.environ.get("DISABLE_DISK_USAGE_MONITOR", None) is not None +) +DISABLE_NETWORK_USAGE_MONITOR: Final[bool] = ( + os.environ.get("DISABLE_NETWORK_USAGE_MONITOR", None) is not None +) +# Internals + LISTEN_PORT: Final[int] = 19597 KERNEL_CHECK_INTERVAL_S: Final[float] = 5 @@ -69,6 +84,14 @@ def _check_if_busy(self) -> bool: bool: True if considered busy """ + @abstractmethod + def get_debug_entry(self) -> dict[str, Any]: + """Information about the current internal state to be exported + + Returns: + dict[str, Any]: json serializable data + """ + def _worker(self) -> None: while self._keep_running: try: @@ -157,6 +180,9 @@ def _check_if_busy(self) -> bool: self._update_kernels_activity() return self.are_kernels_busy + def get_debug_entry(self) -> dict[str, Any]: + return {"kernel_monitor": {"is_busy": self.is_busy}} + ProcessID = int TimeSeconds = float @@ -224,6 +250,11 @@ def _check_if_busy(self) -> bool: self._update_total_cpu_usage() return self.total_cpu_usage > self.busy_threshold + def get_debug_entry(self) -> dict[str, Any]: + return { + "cpu_usage": {"is_busy": self.is_busy, "total": self.total_cpu_usage}, + } + BytesRead = int BytesWrite = int @@ -307,6 +338,17 @@ def _check_if_busy(self) -> bool: or self.total_bytes_write > self.write_usage_threshold ) + def get_debug_entry(self) -> dict[str, Any]: + return { + "disk_usage": { + "is_busy": self.is_busy, + "total": { + "bytes_read_per_second": self.total_bytes_read, + "bytes_write_per_second": self.total_bytes_write, + }, + } + } + InterfaceName = str BytesReceived = int @@ -386,6 +428,17 @@ def _check_if_busy(self) -> bool: or self.bytes_sent > self.sent_usage_threshold ) + def get_debug_entry(self) -> dict[str, Any]: + return { + "network_usage": { + "is_busy": self.is_busy, + "total": { + "bytes_received_per_second": self.bytes_received, + "bytes_sent_per_second": self.bytes_sent, + }, + } + } + class ActivityManager: def __init__(self, interval: float) -> None: @@ -395,29 +448,36 @@ def __init__(self, interval: float) -> None: self.interval = interval self.last_idle: datetime | None = None - self.jupyter_kernel_monitor = JupyterKernelMonitor(KERNEL_CHECK_INTERVAL_S) - self.cpu_usage_monitor = CPUUsageMonitor( - CHECK_INTERVAL_S, - busy_threshold=BUSY_USAGE_THRESHOLD_CPU, - ) - self.disk_usage_monitor = DiskUsageMonitor( - CHECK_INTERVAL_S, - read_usage_threshold=BUSY_USAGE_THRESHOLD_DISK_READ, - write_usage_threshold=BUSY_USAGE_THRESHOLD_DISK_WRITE, - ) - self.network_monitor = NetworkUsageMonitor( - CHECK_INTERVAL_S, - received_usage_threshold=BUSY_USAGE_THRESHOLD_NETWORK_RECEIVED, - sent_usage_threshold=BUSY_USAGE_THRESHOLD_NETWORK_SENT, - ) + self._monitors: list[AbstractIsBusyMonitor] = [] + + if not DISABLE_JUPYTER_KERNEL_MONITOR: + self._monitors.append(JupyterKernelMonitor(KERNEL_CHECK_INTERVAL_S)) + if not DISABLE_CPU_USAGE_MONITOR: + self._monitors.append( + CPUUsageMonitor( + CHECK_INTERVAL_S, + busy_threshold=BUSY_USAGE_THRESHOLD_CPU, + ) + ) + if not DISABLE_DISK_USAGE_MONITOR: + self._monitors.append( + DiskUsageMonitor( + CHECK_INTERVAL_S, + read_usage_threshold=BUSY_USAGE_THRESHOLD_DISK_READ, + write_usage_threshold=BUSY_USAGE_THRESHOLD_DISK_WRITE, + ) + ) + if not DISABLE_NETWORK_USAGE_MONITOR: + self._monitors.append( + NetworkUsageMonitor( + CHECK_INTERVAL_S, + received_usage_threshold=BUSY_USAGE_THRESHOLD_NETWORK_RECEIVED, + sent_usage_threshold=BUSY_USAGE_THRESHOLD_NETWORK_SENT, + ) + ) def check(self): - is_busy = ( - self.jupyter_kernel_monitor.is_busy - or self.cpu_usage_monitor.is_busy - or self.disk_usage_monitor.is_busy - or self.network_monitor.is_busy - ) + is_busy = any(x.is_busy for x in self._monitors) if is_busy: self.last_idle = None @@ -432,6 +492,12 @@ def get_idle_seconds(self) -> float: idle_seconds = (datetime.utcnow() - self.last_idle).total_seconds() return idle_seconds if idle_seconds > 0 else 0 + def get_debug(self) -> dict[str, Any]: + merged_dict: dict[str, Any] = {} + for x in self._monitors: + merged_dict.update(x.get_debug_entry()) + return merged_dict + def _worker(self) -> None: while self._keep_running: with suppress(Exception): @@ -439,10 +505,8 @@ def _worker(self) -> None: time.sleep(self.interval) def start(self) -> None: - self.jupyter_kernel_monitor.start() - self.cpu_usage_monitor.start() - self.disk_usage_monitor.start() - self.network_monitor.start() + for monitor in self._monitors: + monitor.start() self._thread = Thread( target=self._worker, @@ -452,10 +516,8 @@ def start(self) -> None: self._thread.start() def stop(self) -> None: - self.jupyter_kernel_monitor.stop() - self.cpu_usage_monitor.stop() - self.disk_usage_monitor.stop() - self.network_monitor.stop() + for monitor in self._monitors: + monitor.stop() self._keep_running = False self._thread.join() @@ -464,35 +526,6 @@ def stop(self) -> None: ############### Http Server -def _get_activity_response(activity_manager: ActivityManager) -> dict: - return {"seconds_inactive": activity_manager.get_idle_seconds()} - - -def _get_debug_response(activity_manager: ActivityManager) -> dict: - return { - "seconds_inactive": activity_manager.get_idle_seconds(), - "cpu_usage": { - "is_busy": activity_manager.cpu_usage_monitor.is_busy, - "total": activity_manager.cpu_usage_monitor.total_cpu_usage, - }, - "disk_usage": { - "is_busy": activity_manager.disk_usage_monitor.is_busy, - "total": { - "bytes_read_per_second": activity_manager.disk_usage_monitor.total_bytes_read, - "bytes_write_per_second": activity_manager.disk_usage_monitor.total_bytes_write, - }, - }, - "network_usage": { - "is_busy": activity_manager.network_monitor.is_busy, - "total": { - "bytes_received_per_second": activity_manager.network_monitor.bytes_received, - "bytes_sent_per_second": activity_manager.network_monitor.bytes_sent, - }, - }, - "kernel_monitor": {"is_busy": activity_manager.jupyter_kernel_monitor.is_busy}, - } - - class ServerState: pass @@ -516,9 +549,11 @@ def activity_manager(self) -> ActivityManager: def do_GET(self): if self.path == "/activity": - self._send_response(200, _get_activity_response(self.activity_manager)) + self._send_response( + 200, {"seconds_inactive": self.activity_manager.get_idle_seconds()} + ) elif self.path == "/debug": - self._send_response(200, _get_debug_response(self.activity_manager)) + self._send_response(200, self.activity_manager.get_debug()) else: # Handle case where the endpoint is not found self._send_response(404, {"error": "Resource not found"}) From e4c3f83b6725fbf5d2e2076a87ce27fbed237dfe Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 09:28:35 +0100 Subject: [PATCH 06/27] update notes --- src/activity_monitor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/activity_monitor.py b/src/activity_monitor.py index cb691b7..c27cf95 100755 --- a/src/activity_monitor.py +++ b/src/activity_monitor.py @@ -41,7 +41,7 @@ f"{_ENV_VAR_PREFIX}_NETWORK_SENT__BPS", 1 * _TB ) -# The following flags disable different monitors +# NOTE: set the following flags to disable a specific monitor DISABLE_JUPYTER_KERNEL_MONITOR: Final[bool] = ( os.environ.get("DISABLE_JUPYTER_KERNEL_MONITOR", None) is not None ) From 827bbd9b7dc4fe9a68e9c25e5bfd341bc75c53ff Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 09:28:49 +0100 Subject: [PATCH 07/27] refactor --- src/activity_monitor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/activity_monitor.py b/src/activity_monitor.py index c27cf95..c342b18 100755 --- a/src/activity_monitor.py +++ b/src/activity_monitor.py @@ -54,9 +54,8 @@ DISABLE_NETWORK_USAGE_MONITOR: Final[bool] = ( os.environ.get("DISABLE_NETWORK_USAGE_MONITOR", None) is not None ) -# Internals - +# Internals LISTEN_PORT: Final[int] = 19597 KERNEL_CHECK_INTERVAL_S: Final[float] = 5 CHECK_INTERVAL_S: Final[float] = 1 From f31de94f1dbfdc4ab40229410a8e73fc42119ffc Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 09:29:40 +0100 Subject: [PATCH 08/27] refactor --- src/activity_monitor.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/activity_monitor.py b/src/activity_monitor.py index c342b18..572dd18 100755 --- a/src/activity_monitor.py +++ b/src/activity_monitor.py @@ -14,10 +14,7 @@ from typing import Final, Any -_KB: Final[int] = 1024 -_MB: Final[int] = 1024 * _KB -_GB: Final[int] = 1024 * _MB -_TB: Final[int] = 1024 * _GB +_TB: Final[int] = 1024 * 1024 * 1024 * 1024 _ENV_VAR_PREFIX: Final[str] = "ACTIVITY_MONITOR_BUSY_THRESHOLD" # NOTE: using high thresholds to make service by default From 1f4362e6337dbbb427b3d9dd15a9c0709cd942af Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 10:04:17 +0100 Subject: [PATCH 09/27] updated supported versions --- .github/workflows/test_pr_merge.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_pr_merge.yaml b/.github/workflows/test_pr_merge.yaml index dd92440..9a3238e 100644 --- a/.github/workflows/test_pr_merge.yaml +++ b/.github/workflows/test_pr_merge.yaml @@ -12,7 +12,7 @@ jobs: tests: strategy: matrix: - python-version: [3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 3.10, 3.11, 3.12] + python-version: [3.6, 3.7, 3.8, 3.9, 3.10, 3.11, 3.12] runs-on: ubuntu-latest steps: - name: Set up Python ${{ matrix.python-version }} From 3f925b818dad66b2fc0ea1f611663cee5421302e Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 13:09:14 +0100 Subject: [PATCH 10/27] expand env vars and tests --- src/activity.py | 15 +++++++-- src/activity_monitor.py | 56 ++++++++++++++++++------------- tests/_import_utils.py | 2 -- tests/conftest.py | 60 +++++++++++++++++++++++++++++++++- tests/test_activity.py | 23 +++++++++++++ tests/test_activity_monitor.py | 54 ++---------------------------- tests/test_scripts_cupling.py | 25 ++++++++++++++ 7 files changed, 156 insertions(+), 79 deletions(-) create mode 100644 tests/test_activity.py create mode 100644 tests/test_scripts_cupling.py diff --git a/src/activity.py b/src/activity.py index e7000d2..17acbd0 100644 --- a/src/activity.py +++ b/src/activity.py @@ -1,4 +1,15 @@ +import os + +from typing import Final import requests -r = requests.get("http://localhost:19597/activity") -print(r.text) +LISTEN_PORT: Final[int] = int(os.environ.get("ACTIVITY_MONITOR_LISTEN_PORT", 19597)) + + +def main(): + response = requests.get(f"http://localhost:{LISTEN_PORT}/activity") + print(response.text) + + +if __name__ == "__main__": + main() diff --git a/src/activity_monitor.py b/src/activity_monitor.py index 572dd18..e5f6107 100755 --- a/src/activity_monitor.py +++ b/src/activity_monitor.py @@ -15,47 +15,58 @@ _TB: Final[int] = 1024 * 1024 * 1024 * 1024 -_ENV_VAR_PREFIX: Final[str] = "ACTIVITY_MONITOR_BUSY_THRESHOLD" +_ENV_VAR_PREFIX: Final[str] = "ACTIVITY_MONITOR" # NOTE: using high thresholds to make service by default # considered inactive. # If the service owner does not change these, by lowering # them to an adequate value the service will always be shut # down as soon as the inactivity period is detected. +_THRESHOLD_PREFIX: Final[str] = f"{_ENV_VAR_PREFIX}_BUSY_THRESHOLD" BUSY_USAGE_THRESHOLD_CPU: Final[float] = os.environ.get( - f"{_ENV_VAR_PREFIX}_CPU_PERCENT", 1000 + f"{_THRESHOLD_PREFIX}_CPU_PERCENT", 1000 ) BUSY_USAGE_THRESHOLD_DISK_READ: Final[int] = os.environ.get( - f"{_ENV_VAR_PREFIX}_DISK_READ_BPS", 1 * _TB + f"{_THRESHOLD_PREFIX}_DISK_READ_BPS", 1 * _TB ) BUSY_USAGE_THRESHOLD_DISK_WRITE: Final[int] = os.environ.get( - f"{_ENV_VAR_PREFIX}_DISK_WRITE_BPS", 1 * _TB + f"{_THRESHOLD_PREFIX}_DISK_WRITE_BPS", 1 * _TB ) BUSY_USAGE_THRESHOLD_NETWORK_RECEIVED: Final[int] = os.environ.get( - f"{_ENV_VAR_PREFIX}_NETWORK_RECEIVE_BPS", 1 * _TB + f"{_THRESHOLD_PREFIX}_NETWORK_RECEIVE_BPS", 1 * _TB ) BUSY_USAGE_THRESHOLD_NETWORK_SENT: Final[int] = os.environ.get( - f"{_ENV_VAR_PREFIX}_NETWORK_SENT__BPS", 1 * _TB + f"{_THRESHOLD_PREFIX}_NETWORK_SENT__BPS", 1 * _TB ) # NOTE: set the following flags to disable a specific monitor DISABLE_JUPYTER_KERNEL_MONITOR: Final[bool] = ( - os.environ.get("DISABLE_JUPYTER_KERNEL_MONITOR", None) is not None + os.environ.get(f"{_ENV_VAR_PREFIX}_DISABLE_JUPYTER_KERNEL_MONITOR", None) + is not None ) DISABLE_CPU_USAGE_MONITOR: Final[bool] = ( - os.environ.get("DISABLE_CPU_USAGE_MONITOR", None) is not None + os.environ.get(f"{_ENV_VAR_PREFIX}_DISABLE_CPU_USAGE_MONITOR", None) is not None ) DISABLE_DISK_USAGE_MONITOR: Final[bool] = ( - os.environ.get("DISABLE_DISK_USAGE_MONITOR", None) is not None + os.environ.get(f"{_ENV_VAR_PREFIX}_DISABLE_DISK_USAGE_MONITOR", None) is not None ) DISABLE_NETWORK_USAGE_MONITOR: Final[bool] = ( - os.environ.get("DISABLE_NETWORK_USAGE_MONITOR", None) is not None + os.environ.get(f"{_ENV_VAR_PREFIX}_DISABLE_NETWORK_USAGE_MONITOR", None) is not None ) +# NOTE: Other configuration options +JUPYTER_NOTEBOOK_BASE_URL: Final[str] = os.environ.get( + f"{_ENV_VAR_PREFIX}_JUPYTER_NOTEBOOK_BASE_URL", "http://localhost:8888" +) +JUPYTER_NOTEBOOK_KERNEL_CHECK_INTERVAL_S: Final[float] = float( + os.environ.get(f"{_ENV_VAR_PREFIX}_JUPYTER_NOTEBOOK_KERNEL_CHECK_INTERVAL_S", 5) +) +MONITOR_INTERVAL_S: Final[float] = float( + os.environ.get(f"{_ENV_VAR_PREFIX}_MONITOR_INTERVAL_S", 1) +) +LISTEN_PORT: Final[int] = int(os.environ.get(f"{_ENV_VAR_PREFIX}_LISTEN_PORT", 19597)) + # Internals -LISTEN_PORT: Final[int] = 19597 -KERNEL_CHECK_INTERVAL_S: Final[float] = 5 -CHECK_INTERVAL_S: Final[float] = 1 _THREAD_EXECUTOR_WORKERS: Final[int] = 10 _logger = logging.getLogger(__name__) @@ -147,15 +158,14 @@ def _get_sibling_processes() -> list[psutil.Process]: class JupyterKernelMonitor(AbstractIsBusyMonitor): - BASE_URL = "http://localhost:8888" - HEADERS = {"accept": "application/json"} - def __init__(self, poll_interval: float) -> None: super().__init__(poll_interval=poll_interval) self.are_kernels_busy: bool = False def _get(self, path: str) -> dict: - r = requests.get(f"{self.BASE_URL}{path}", headers=self.HEADERS) + r = requests.get( + f"{JUPYTER_NOTEBOOK_BASE_URL}{path}", headers={"accept": "application/json"} + ) return r.json() def _update_kernels_activity(self) -> None: @@ -447,18 +457,20 @@ def __init__(self, interval: float) -> None: self._monitors: list[AbstractIsBusyMonitor] = [] if not DISABLE_JUPYTER_KERNEL_MONITOR: - self._monitors.append(JupyterKernelMonitor(KERNEL_CHECK_INTERVAL_S)) + self._monitors.append( + JupyterKernelMonitor(JUPYTER_NOTEBOOK_KERNEL_CHECK_INTERVAL_S) + ) if not DISABLE_CPU_USAGE_MONITOR: self._monitors.append( CPUUsageMonitor( - CHECK_INTERVAL_S, + MONITOR_INTERVAL_S, busy_threshold=BUSY_USAGE_THRESHOLD_CPU, ) ) if not DISABLE_DISK_USAGE_MONITOR: self._monitors.append( DiskUsageMonitor( - CHECK_INTERVAL_S, + MONITOR_INTERVAL_S, read_usage_threshold=BUSY_USAGE_THRESHOLD_DISK_READ, write_usage_threshold=BUSY_USAGE_THRESHOLD_DISK_WRITE, ) @@ -466,7 +478,7 @@ def __init__(self, interval: float) -> None: if not DISABLE_NETWORK_USAGE_MONITOR: self._monitors.append( NetworkUsageMonitor( - CHECK_INTERVAL_S, + MONITOR_INTERVAL_S, received_usage_threshold=BUSY_USAGE_THRESHOLD_NETWORK_RECEIVED, sent_usage_threshold=BUSY_USAGE_THRESHOLD_NETWORK_SENT, ) @@ -556,7 +568,7 @@ def do_GET(self): def make_server(port: int) -> HTTPServerWithState: state = ServerState() - state.activity_manager = ActivityManager(CHECK_INTERVAL_S) + state.activity_manager = ActivityManager(MONITOR_INTERVAL_S) state.activity_manager.start() server_address = ("", port) # Listen on all interfaces, port 8000 diff --git a/tests/_import_utils.py b/tests/_import_utils.py index 0358140..e975136 100644 --- a/tests/_import_utils.py +++ b/tests/_import_utils.py @@ -10,5 +10,3 @@ def allow_imports() -> None: path = (_CURRENT_DIR / "..." / ".." / ".." / "src").absolute().resolve() assert path.exists() sys.path.append(f"{path}") - - import activity_monitor diff --git a/tests/conftest.py b/tests/conftest.py index 78984a4..72d4673 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,14 +1,27 @@ import ctypes import pytest import socket +import json import threading import time +import requests +import requests_mock from concurrent.futures import ThreadPoolExecutor, wait from multiprocessing import Array, Process from tempfile import NamedTemporaryFile +from tenacity import Retrying +from tenacity.stop import stop_after_delay +from tenacity.wait import wait_fixed +from typing import Callable, Final, Iterable, TYPE_CHECKING -from typing import Callable, Final, Iterable +if TYPE_CHECKING: + from ..docker import activity_monitor +else: + from _import_utils import allow_imports + + allow_imports() + import activity_monitor _LOCAL_LISTEN_PORT: Final[int] = 12345 @@ -133,3 +146,48 @@ def _(*, network: bool, cpu: bool, disk: bool) -> _ActivityGenerator: for instance in created: instance.stop() + + +@pytest.fixture +def mock_jupyter_kernel_monitor(are_kernels_busy: bool) -> Iterable[None]: + with requests_mock.Mocker(real_http=True) as m: + m.get("http://localhost:8888/api/kernels", text=json.dumps([{"id": "atest1"}])) + m.get( + "http://localhost:8888/api/kernels/atest1", + text=json.dumps( + {"execution_state": "running" if are_kernels_busy else "idle"} + ), + ) + yield + + +@pytest.fixture +def server_url() -> str: + return f"http://localhost:{activity_monitor.LISTEN_PORT}" + + +@pytest.fixture +def http_server(mock_jupyter_kernel_monitor: None, server_url: str) -> None: + server = activity_monitor.make_server(activity_monitor.LISTEN_PORT) + + def _run_server_worker() -> None: + server.serve_forever() + + thread = threading.Thread(target=_run_server_worker, daemon=True) + thread.start() + + # ensure server is running + for attempt in Retrying( + stop=stop_after_delay(3), wait=wait_fixed(0.1), reraise=True + ): + with attempt: + result = requests.get(f"{server_url}/activity", timeout=1) + assert result.status_code == 200, result.text + + yield None + + server.shutdown() + server.server_close() + + with pytest.raises(requests.exceptions.RequestException): + requests.get(f"{server_url}/activity", timeout=1) diff --git a/tests/test_activity.py b/tests/test_activity.py new file mode 100644 index 0000000..7e0c313 --- /dev/null +++ b/tests/test_activity.py @@ -0,0 +1,23 @@ +import pytest +import json +from typing import TYPE_CHECKING + + +if TYPE_CHECKING: + from ..docker import activity +else: + from _import_utils import allow_imports + + allow_imports() + import activity + + +@pytest.fixture +def are_kernels_busy() -> bool: + return True + + +def test_activity(http_server: None, capfd: pytest.CaptureFixture): + activity.main() + capture_result = capfd.readouterr() + assert json.loads(capture_result.out) == {"seconds_inactive": 0} diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index e9b0091..01b271d 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -175,19 +175,6 @@ def test_network_usage_monitor_still_busy( assert network_usage_monitor.is_busy is True -@pytest.fixture -def mock_jupyter_kernel_monitor(are_kernels_busy: bool) -> Iterable[None]: - with requests_mock.Mocker(real_http=True) as m: - m.get("http://localhost:8888/api/kernels", text=json.dumps([{"id": "atest1"}])) - m.get( - "http://localhost:8888/api/kernels/atest1", - text=json.dumps( - {"execution_state": "running" if are_kernels_busy else "idle"} - ), - ) - yield - - @pytest.mark.parametrize("are_kernels_busy", [True, False]) def test_jupyter_kernel_monitor( mock_jupyter_kernel_monitor: None, are_kernels_busy: bool @@ -197,38 +184,6 @@ def test_jupyter_kernel_monitor( assert kernel_monitor.are_kernels_busy is are_kernels_busy -@pytest.fixture -def server_url() -> str: - return f"http://localhost:{activity_monitor.LISTEN_PORT}" - - -@pytest.fixture -def http_server(mock_jupyter_kernel_monitor: None, server_url: str) -> None: - server = activity_monitor.make_server(activity_monitor.LISTEN_PORT) - - def _run_server_worker() -> None: - server.serve_forever() - - thread = threading.Thread(target=_run_server_worker, daemon=True) - thread.start() - - # ensure server is running - for attempt in Retrying( - stop=stop_after_delay(3), wait=wait_fixed(0.1), reraise=True - ): - with attempt: - result = requests.get(f"{server_url}/activity", timeout=1) - assert result.status_code == 200, result.text - - yield None - - server.shutdown() - server.server_close() - - with pytest.raises(requests.exceptions.RequestException): - requests.get(f"{server_url}/activity", timeout=1) - - @pytest.mark.parametrize("are_kernels_busy", [False]) def test_http_server_ok(http_server: None, server_url: str): result = requests.get(f"{server_url}/activity", timeout=1) @@ -240,13 +195,8 @@ def test_http_server_ok(http_server: None, server_url: str): @pytest.fixture def mock_activity_manager_config(mocker: MockFixture) -> None: - mocker.patch("activity_monitor.CHECK_INTERVAL_S", 1) - mocker.patch("activity_monitor.KERNEL_CHECK_INTERVAL_S", 1) - - mocker.patch( - "activity_monitor.BUSY_USAGE_THRESHOLD_NETWORK_RECEIVED", _BIG_THRESHOLD - ) - mocker.patch("activity_monitor.BUSY_USAGE_THRESHOLD_NETWORK_SENT", _BIG_THRESHOLD) + mocker.patch("activity_monitor.MONITOR_INTERVAL_S", 1) + mocker.patch("activity_monitor.JUPYTER_NOTEBOOK_KERNEL_CHECK_INTERVAL_S", 1) @pytest.mark.parametrize("are_kernels_busy", [False]) diff --git a/tests/test_scripts_cupling.py b/tests/test_scripts_cupling.py new file mode 100644 index 0000000..ab40af7 --- /dev/null +++ b/tests/test_scripts_cupling.py @@ -0,0 +1,25 @@ +import pytest +from importlib import reload + +from typing import TYPE_CHECKING + +if not TYPE_CHECKING: + from _import_utils import allow_imports + + allow_imports() + + +def test_same_listen_port(monkeypatch: pytest.MonkeyPatch): + import activity + import activity_monitor + + assert activity_monitor.LISTEN_PORT == activity.LISTEN_PORT + + mocked_port = 314 + monkeypatch.setenv("ACTIVITY_MONITOR_LISTEN_PORT", mocked_port) + + reload(activity) + reload(activity_monitor) + + assert activity_monitor.LISTEN_PORT == mocked_port + assert activity.LISTEN_PORT == mocked_port From 603cf50fb04d825a2595bb9e5c79acc916d761cb Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 13:09:39 +0100 Subject: [PATCH 11/27] refactor --- tests/test_activity_monitor.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index 01b271d..d73a400 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -1,12 +1,9 @@ -import json import psutil import pytest import requests -import requests_mock -import threading import time -from typing import Callable, Final, Iterable, TYPE_CHECKING +from typing import Callable, Final, TYPE_CHECKING from pytest_mock import MockFixture from tenacity import Retrying from tenacity.stop import stop_after_delay From bd3102ae0b9176fe3898e7f5d5d956c089e6b79e Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 13:19:49 +0100 Subject: [PATCH 12/27] refactor --- .github/workflows/test_pr_merge.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test_pr_merge.yaml b/.github/workflows/test_pr_merge.yaml index 9a3238e..91b426d 100644 --- a/.github/workflows/test_pr_merge.yaml +++ b/.github/workflows/test_pr_merge.yaml @@ -15,12 +15,11 @@ jobs: python-version: [3.6, 3.7, 3.8, 3.9, 3.10, 3.11, 3.12] runs-on: ubuntu-latest steps: + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - name: Checkout repo content - uses: actions/checkout@v2 - name: Install dependencies run: ./scripts/ci_tests.sh install - name: Run tests From ac5933268b00481efbc9716a9cb69b214863f930 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 13:22:05 +0100 Subject: [PATCH 13/27] refactor --- .github/workflows/test_pr_merge.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_pr_merge.yaml b/.github/workflows/test_pr_merge.yaml index 91b426d..1421de6 100644 --- a/.github/workflows/test_pr_merge.yaml +++ b/.github/workflows/test_pr_merge.yaml @@ -12,7 +12,7 @@ jobs: tests: strategy: matrix: - python-version: [3.6, 3.7, 3.8, 3.9, 3.10, 3.11, 3.12] + python-version: ["3.7", "3.8", "3.9", "3.10", " 3.11", "3.12"] runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 From f4aba30b077029f554e6b0185553a25578d05dfb Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 13:24:04 +0100 Subject: [PATCH 14/27] install none specific packages --- scripts/ci_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ci_tests.sh b/scripts/ci_tests.sh index b5626c2..4207194 100755 --- a/scripts/ci_tests.sh +++ b/scripts/ci_tests.sh @@ -10,7 +10,7 @@ IFS=$'\n\t' install() { make .venv source .venv/bin/activate - make install-test + pip install -r requirements/test.in pip list --verbose } From bb68bdb919f708301a394163e1de856f39b5d4d8 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 13:25:33 +0100 Subject: [PATCH 15/27] try 3.6 support again --- .github/workflows/test_pr_merge.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_pr_merge.yaml b/.github/workflows/test_pr_merge.yaml index 1421de6..2026a22 100644 --- a/.github/workflows/test_pr_merge.yaml +++ b/.github/workflows/test_pr_merge.yaml @@ -12,7 +12,7 @@ jobs: tests: strategy: matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", " 3.11", "3.12"] + python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", " 3.11", "3.12"] runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 From 45328c7e64a16e3e7fcc125480cc4c15d8f7599b Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 13:27:37 +0100 Subject: [PATCH 16/27] downgrading --- .github/workflows/test_pr_merge.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_pr_merge.yaml b/.github/workflows/test_pr_merge.yaml index 2026a22..934db18 100644 --- a/.github/workflows/test_pr_merge.yaml +++ b/.github/workflows/test_pr_merge.yaml @@ -13,7 +13,7 @@ jobs: strategy: matrix: python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", " 3.11", "3.12"] - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} From ae143fc2715d347b01b1132318c9c394b47065ab Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 13:38:57 +0100 Subject: [PATCH 17/27] fixed tests --- src/activity.py | 3 +- src/activity_monitor.py | 78 +++++++++++++++++----------------- tests/conftest.py | 4 +- tests/test_activity_monitor.py | 25 +++++------ 4 files changed, 53 insertions(+), 57 deletions(-) diff --git a/src/activity.py b/src/activity.py index 17acbd0..83058bd 100644 --- a/src/activity.py +++ b/src/activity.py @@ -1,9 +1,8 @@ import os -from typing import Final import requests -LISTEN_PORT: Final[int] = int(os.environ.get("ACTIVITY_MONITOR_LISTEN_PORT", 19597)) +LISTEN_PORT: int = int(os.environ.get("ACTIVITY_MONITOR_LISTEN_PORT", 19597)) def main(): diff --git a/src/activity_monitor.py b/src/activity_monitor.py index e5f6107..d964d00 100755 --- a/src/activity_monitor.py +++ b/src/activity_monitor.py @@ -11,63 +11,63 @@ from datetime import datetime from http.server import HTTPServer, BaseHTTPRequestHandler from threading import Thread -from typing import Final, Any +from typing import Any, Dict, List, Tuple, Set -_TB: Final[int] = 1024 * 1024 * 1024 * 1024 -_ENV_VAR_PREFIX: Final[str] = "ACTIVITY_MONITOR" +_TB: int = 1024 * 1024 * 1024 * 1024 +_ENV_VAR_PREFIX: str = "ACTIVITY_MONITOR" # NOTE: using high thresholds to make service by default # considered inactive. # If the service owner does not change these, by lowering # them to an adequate value the service will always be shut # down as soon as the inactivity period is detected. -_THRESHOLD_PREFIX: Final[str] = f"{_ENV_VAR_PREFIX}_BUSY_THRESHOLD" -BUSY_USAGE_THRESHOLD_CPU: Final[float] = os.environ.get( +_THRESHOLD_PREFIX: str = f"{_ENV_VAR_PREFIX}_BUSY_THRESHOLD" +BUSY_USAGE_THRESHOLD_CPU: float = os.environ.get( f"{_THRESHOLD_PREFIX}_CPU_PERCENT", 1000 ) -BUSY_USAGE_THRESHOLD_DISK_READ: Final[int] = os.environ.get( +BUSY_USAGE_THRESHOLD_DISK_READ: int = os.environ.get( f"{_THRESHOLD_PREFIX}_DISK_READ_BPS", 1 * _TB ) -BUSY_USAGE_THRESHOLD_DISK_WRITE: Final[int] = os.environ.get( +BUSY_USAGE_THRESHOLD_DISK_WRITE: int = os.environ.get( f"{_THRESHOLD_PREFIX}_DISK_WRITE_BPS", 1 * _TB ) -BUSY_USAGE_THRESHOLD_NETWORK_RECEIVED: Final[int] = os.environ.get( +BUSY_USAGE_THRESHOLD_NETWORK_RECEIVED: int = os.environ.get( f"{_THRESHOLD_PREFIX}_NETWORK_RECEIVE_BPS", 1 * _TB ) -BUSY_USAGE_THRESHOLD_NETWORK_SENT: Final[int] = os.environ.get( +BUSY_USAGE_THRESHOLD_NETWORK_SENT: int = os.environ.get( f"{_THRESHOLD_PREFIX}_NETWORK_SENT__BPS", 1 * _TB ) # NOTE: set the following flags to disable a specific monitor -DISABLE_JUPYTER_KERNEL_MONITOR: Final[bool] = ( +DISABLE_JUPYTER_KERNEL_MONITOR: bool = ( os.environ.get(f"{_ENV_VAR_PREFIX}_DISABLE_JUPYTER_KERNEL_MONITOR", None) is not None ) -DISABLE_CPU_USAGE_MONITOR: Final[bool] = ( +DISABLE_CPU_USAGE_MONITOR: bool = ( os.environ.get(f"{_ENV_VAR_PREFIX}_DISABLE_CPU_USAGE_MONITOR", None) is not None ) -DISABLE_DISK_USAGE_MONITOR: Final[bool] = ( +DISABLE_DISK_USAGE_MONITOR: bool = ( os.environ.get(f"{_ENV_VAR_PREFIX}_DISABLE_DISK_USAGE_MONITOR", None) is not None ) -DISABLE_NETWORK_USAGE_MONITOR: Final[bool] = ( +DISABLE_NETWORK_USAGE_MONITOR: bool = ( os.environ.get(f"{_ENV_VAR_PREFIX}_DISABLE_NETWORK_USAGE_MONITOR", None) is not None ) # NOTE: Other configuration options -JUPYTER_NOTEBOOK_BASE_URL: Final[str] = os.environ.get( +JUPYTER_NOTEBOOK_BASE_URL: str = os.environ.get( f"{_ENV_VAR_PREFIX}_JUPYTER_NOTEBOOK_BASE_URL", "http://localhost:8888" ) -JUPYTER_NOTEBOOK_KERNEL_CHECK_INTERVAL_S: Final[float] = float( +JUPYTER_NOTEBOOK_KERNEL_CHECK_INTERVAL_S: float = float( os.environ.get(f"{_ENV_VAR_PREFIX}_JUPYTER_NOTEBOOK_KERNEL_CHECK_INTERVAL_S", 5) ) -MONITOR_INTERVAL_S: Final[float] = float( +MONITOR_INTERVAL_S: float = float( os.environ.get(f"{_ENV_VAR_PREFIX}_MONITOR_INTERVAL_S", 1) ) -LISTEN_PORT: Final[int] = int(os.environ.get(f"{_ENV_VAR_PREFIX}_LISTEN_PORT", 19597)) +LISTEN_PORT: int = int(os.environ.get(f"{_ENV_VAR_PREFIX}_LISTEN_PORT", 19597)) # Internals -_THREAD_EXECUTOR_WORKERS: Final[int] = 10 +_THREAD_EXECUTOR_WORKERS: int = 10 _logger = logging.getLogger(__name__) @@ -92,7 +92,7 @@ def _check_if_busy(self) -> bool: """ @abstractmethod - def get_debug_entry(self) -> dict[str, Any]: + def get_debug_entry(self) -> Dict[str, Any]: """Information about the current internal state to be exported Returns: @@ -129,14 +129,14 @@ def __exit__(self, exc_type, exc_value, traceback): self.stop() -def __get_children_processes_recursive(pid) -> list[psutil.Process]: +def __get_children_processes_recursive(pid) -> List[psutil.Process]: try: return psutil.Process(pid).children(recursive=True) except psutil.NoSuchProcess: return [] -def _get_sibling_processes() -> list[psutil.Process]: +def _get_sibling_processes() -> List[psutil.Process]: # Returns the CPU usage of all processes except this one. # ASSUMPTIONS: # - `CURRENT_PROC` is a child of root process @@ -186,7 +186,7 @@ def _check_if_busy(self) -> bool: self._update_kernels_activity() return self.are_kernels_busy - def get_debug_entry(self) -> dict[str, Any]: + def get_debug_entry(self) -> Dict[str, Any]: return {"kernel_monitor": {"is_busy": self.is_busy}} @@ -213,13 +213,13 @@ def __init__(self, poll_interval: float, *, busy_threshold: float): @staticmethod def _sample_cpu_usage( process: psutil.Process, - ) -> tuple[ProcessID, tuple[TimeSeconds, PercentCPU]]: + ) -> Tuple[ProcessID, Tuple[TimeSeconds, PercentCPU]]: """returns: tuple[pid, tuple[time, percent_cpu_usage]]""" return (process.pid, (time.time(), process.cpu_percent())) def _sample_total_cpu_usage( self, - ) -> dict[ProcessID, tuple[TimeSeconds, PercentCPU]]: + ) -> Dict[ProcessID, Tuple[TimeSeconds, PercentCPU]]: futures = [ self.thread_executor.submit(self._sample_cpu_usage, p) for p in _get_sibling_processes() @@ -228,7 +228,7 @@ def _sample_total_cpu_usage( @staticmethod def _get_cpu_over_1_second( - last: tuple[TimeSeconds, PercentCPU], current: tuple[TimeSeconds, PercentCPU] + last: Tuple[TimeSeconds, PercentCPU], current: Tuple[TimeSeconds, PercentCPU] ) -> float: interval = current[0] - last[0] measured_cpu_in_interval = current[1] @@ -256,7 +256,7 @@ def _check_if_busy(self) -> bool: self._update_total_cpu_usage() return self.total_cpu_usage > self.busy_threshold - def get_debug_entry(self) -> dict[str, Any]: + def get_debug_entry(self) -> Dict[str, Any]: return { "cpu_usage": {"is_busy": self.is_busy, "total": self.total_cpu_usage}, } @@ -288,13 +288,13 @@ def __init__( @staticmethod def _sample_disk_usage( process: psutil.Process, - ) -> tuple[ProcessID, tuple[TimeSeconds, BytesRead, BytesWrite]]: + ) -> Tuple[ProcessID, Tuple[TimeSeconds, BytesRead, BytesWrite]]: counters = process.io_counters() return (process.pid, (time.time(), counters.read_bytes, counters.write_bytes)) def _sample_total_disk_usage( self, - ) -> dict[ProcessID, tuple[TimeSeconds, BytesRead, BytesWrite]]: + ) -> Dict[ProcessID, Tuple[TimeSeconds, BytesRead, BytesWrite]]: futures = [ self.thread_executor.submit(self._sample_disk_usage, p) for p in _get_sibling_processes() @@ -303,9 +303,9 @@ def _sample_total_disk_usage( @staticmethod def _get_bytes_over_one_second( - last: tuple[TimeSeconds, BytesRead, BytesWrite], - current: tuple[TimeSeconds, BytesRead, BytesWrite], - ) -> tuple[BytesRead, BytesWrite]: + last: Tuple[TimeSeconds, BytesRead, BytesWrite], + current: Tuple[TimeSeconds, BytesRead, BytesWrite], + ) -> Tuple[BytesRead, BytesWrite]: interval = current[0] - last[0] measured_bytes_read_in_interval = current[1] - last[1] measured_bytes_write_in_interval = current[2] - last[2] @@ -344,7 +344,7 @@ def _check_if_busy(self) -> bool: or self.total_bytes_write > self.write_usage_threshold ) - def get_debug_entry(self) -> dict[str, Any]: + def get_debug_entry(self) -> Dict[str, Any]: return { "disk_usage": { "is_busy": self.is_busy, @@ -362,7 +362,7 @@ def get_debug_entry(self) -> dict[str, Any]: class NetworkUsageMonitor(AbstractIsBusyMonitor): - _EXCLUDE_INTERFACES: set[InterfaceName] = { + _EXCLUDE_INTERFACES: Set[InterfaceName] = { "lo", } @@ -385,7 +385,7 @@ def __init__( def _sample_total_network_usage( self, - ) -> tuple[TimeSeconds, BytesReceived, BytesSent]: + ) -> Tuple[TimeSeconds, BytesReceived, BytesSent]: net_io_counters = psutil.net_io_counters(pernic=True) total_bytes_received: int = 0 @@ -401,9 +401,9 @@ def _sample_total_network_usage( @staticmethod def _get_bytes_over_one_second( - last: tuple[TimeSeconds, BytesReceived, BytesSent], - current: tuple[TimeSeconds, BytesReceived, BytesSent], - ) -> tuple[BytesReceived, BytesSent]: + last: Tuple[TimeSeconds, BytesReceived, BytesSent], + current: Tuple[TimeSeconds, BytesReceived, BytesSent], + ) -> Tuple[BytesReceived, BytesSent]: interval = current[0] - last[0] measured_bytes_received_in_interval = current[1] - last[1] measured_bytes_sent_in_interval = current[2] - last[2] @@ -434,7 +434,7 @@ def _check_if_busy(self) -> bool: or self.bytes_sent > self.sent_usage_threshold ) - def get_debug_entry(self) -> dict[str, Any]: + def get_debug_entry(self) -> Dict[str, Any]: return { "network_usage": { "is_busy": self.is_busy, @@ -500,7 +500,7 @@ def get_idle_seconds(self) -> float: idle_seconds = (datetime.utcnow() - self.last_idle).total_seconds() return idle_seconds if idle_seconds > 0 else 0 - def get_debug(self) -> dict[str, Any]: + def get_debug(self) -> Dict[str, Any]: merged_dict: dict[str, Any] = {} for x in self._monitors: merged_dict.update(x.get_debug_entry()) diff --git a/tests/conftest.py b/tests/conftest.py index 72d4673..d9c3de0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,7 +13,7 @@ from tenacity import Retrying from tenacity.stop import stop_after_delay from tenacity.wait import wait_fixed -from typing import Callable, Final, Iterable, TYPE_CHECKING +from typing import Callable, Iterable, TYPE_CHECKING if TYPE_CHECKING: from ..docker import activity_monitor @@ -24,7 +24,7 @@ import activity_monitor -_LOCAL_LISTEN_PORT: Final[int] = 12345 +_LOCAL_LISTEN_PORT: int = 12345 class _ListenSocketServer: diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index d73a400..f56a77d 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -3,7 +3,7 @@ import requests import time -from typing import Callable, Final, TYPE_CHECKING +from typing import Callable, List, Dict, TYPE_CHECKING from pytest_mock import MockFixture from tenacity import Retrying from tenacity.stop import stop_after_delay @@ -23,8 +23,8 @@ @pytest.fixture def mock__get_sibling_processes( mocker: MockFixture, -) -> Callable[[list[int]], list[psutil.Process]]: - def _get_processes(pids: list[int]) -> list[psutil.Process]: +) -> Callable[[List[int]], List[psutil.Process]]: + def _get_processes(pids: List[int]) -> List[psutil.Process]: results = [] for pid in pids: proc = psutil.Process(pid) @@ -32,7 +32,7 @@ def _get_processes(pids: list[int]) -> list[psutil.Process]: results.append(proc) return results - def _(pids: list[int]) -> None: + def _(pids: List[int]) -> None: mocker.patch( "activity_monitor._get_sibling_processes", return_value=_get_processes(pids) ) @@ -42,7 +42,7 @@ def _(pids: list[int]) -> None: def test_cpu_usage_monitor_not_busy( socket_server: None, - mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], + mock__get_sibling_processes: Callable[[List[int]], List[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], ): activity_generator = create_activity_generator(network=False, cpu=False, disk=False) @@ -59,7 +59,7 @@ def test_cpu_usage_monitor_not_busy( def test_cpu_usage_monitor_still_busy( socket_server: None, - mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], + mock__get_sibling_processes: Callable[[List[int]], List[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], ): activity_generator = create_activity_generator(network=False, cpu=True, disk=False) @@ -76,7 +76,7 @@ def test_cpu_usage_monitor_still_busy( def test_disk_usage_monitor_not_busy( socket_server: None, - mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], + mock__get_sibling_processes: Callable[[List[int]], List[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], ): activity_generator = create_activity_generator(network=False, cpu=False, disk=False) @@ -98,7 +98,7 @@ def test_disk_usage_monitor_not_busy( def test_disk_usage_monitor_still_busy( socket_server: None, - mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], + mock__get_sibling_processes: Callable[[List[int]], List[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], ): activity_generator = create_activity_generator(network=False, cpu=False, disk=True) @@ -128,7 +128,7 @@ def mock_no_network_activity(mocker: MockFixture) -> None: def test_network_usage_monitor_not_busy( mock_no_network_activity: None, socket_server: None, - mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], + mock__get_sibling_processes: Callable[[List[int]], List[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], ): activity_generator = create_activity_generator(network=False, cpu=False, disk=False) @@ -155,7 +155,7 @@ def mock_network_monitor_exclude_interfaces(mocker: MockFixture) -> None: def test_network_usage_monitor_still_busy( mock_network_monitor_exclude_interfaces: None, socket_server: None, - mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], + mock__get_sibling_processes: Callable[[List[int]], List[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], ): activity_generator = create_activity_generator(network=True, cpu=False, disk=False) @@ -187,9 +187,6 @@ def test_http_server_ok(http_server: None, server_url: str): assert result.status_code == 200 -_BIG_THRESHOLD: Final[int] = int(1e10) - - @pytest.fixture def mock_activity_manager_config(mocker: MockFixture) -> None: mocker.patch("activity_monitor.MONITOR_INTERVAL_S", 1) @@ -200,7 +197,7 @@ def mock_activity_manager_config(mocker: MockFixture) -> None: def test_activity_monitor_becomes_not_busy( mock_activity_manager_config: None, socket_server: None, - mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], + mock__get_sibling_processes: Callable[[List[int]], List[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], http_server: None, server_url: str, From c3a9d4498582854ff5830339d9259254b3e4d238 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 13:40:23 +0100 Subject: [PATCH 18/27] unpin dependencies --- Makefile | 7 ++----- requirements/test.txt | 49 ------------------------------------------- scripts/ci_tests.sh | 3 +-- 3 files changed, 3 insertions(+), 56 deletions(-) delete mode 100644 requirements/test.txt diff --git a/Makefile b/Makefile index 7c9b62d..f5f8b0d 100644 --- a/Makefile +++ b/Makefile @@ -20,14 +20,11 @@ devenv: .venv ## create a python virtual environment with tools to dev, run and @echo "To activate the virtual environment, run 'source $ Date: Tue, 26 Mar 2024 13:42:21 +0100 Subject: [PATCH 19/27] include activity in coverage --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f5f8b0d..81d01c3 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ tests-dev: ## run tests in development mode .PHONY: tests-ci tests-ci: ## run testds in the CI - .venv/bin/pytest -vvv --color=yes --cov-report term --cov=activity_monitor tests + .venv/bin/pytest -vvv --color=yes --cov-report term --cov=activity_monitor --cov=activity tests .PHONY: release From e0a10cd9d17d2fd0494d84169d73f9d3b8ac9e08 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 13:44:16 +0100 Subject: [PATCH 20/27] add py3.5 --- .github/workflows/test_pr_merge.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test_pr_merge.yaml b/.github/workflows/test_pr_merge.yaml index 934db18..192dfb8 100644 --- a/.github/workflows/test_pr_merge.yaml +++ b/.github/workflows/test_pr_merge.yaml @@ -12,7 +12,8 @@ jobs: tests: strategy: matrix: - python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", " 3.11", "3.12"] + python-version: + ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", " 3.11", "3.12"] runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 From 6570b4c69da5f0fbc700c6cbacc52ea342a4e704 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 13:45:37 +0100 Subject: [PATCH 21/27] revert --- .github/workflows/test_pr_merge.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/test_pr_merge.yaml b/.github/workflows/test_pr_merge.yaml index 192dfb8..934db18 100644 --- a/.github/workflows/test_pr_merge.yaml +++ b/.github/workflows/test_pr_merge.yaml @@ -12,8 +12,7 @@ jobs: tests: strategy: matrix: - python-version: - ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", " 3.11", "3.12"] + python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", " 3.11", "3.12"] runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 From 71fb3ec797eba85d590817088eb7dc1669ae1427 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 14:34:00 +0100 Subject: [PATCH 22/27] added metrics endpoint --- src/activity_monitor.py | 99 +++++++++++++++++++++++++++++----- tests/test_activity_monitor.py | 65 +++++++++++++++++++--- 2 files changed, 143 insertions(+), 21 deletions(-) diff --git a/src/activity_monitor.py b/src/activity_monitor.py index d964d00..fb9373b 100755 --- a/src/activity_monitor.py +++ b/src/activity_monitor.py @@ -11,7 +11,7 @@ from datetime import datetime from http.server import HTTPServer, BaseHTTPRequestHandler from threading import Thread -from typing import Any, Dict, List, Tuple, Set +from typing import Any, Dict, List, Tuple, Set, Union _TB: int = 1024 * 1024 * 1024 * 1024 @@ -73,12 +73,49 @@ ############### Utils + + +_METRICS_COUNTER_TEMPLATE: str = """ +# HELP {name} {help} +# TYPE {name} counter +{name} {value} +""" + + +MetricEntry = Dict[str, Union[str, int]] + + +class MetricsManager: + def __init__(self) -> None: + self._metrics: Dict[str, MetricEntry] = {} + + def register_metric( + self, name: str, *, help: str, initial_value: Union[int, float] + ) -> None: + self._metrics[name] = {"help": help, "value": initial_value} + + def inc_metric(self, name: str, value: Union[int, float]) -> None: + self._metrics[name]["value"] += value + + def format_metrics(self) -> str: + result = "" + + for name, metric_entry in self._metrics.items(): + entry = _METRICS_COUNTER_TEMPLATE.format( + name=name, help=metric_entry["help"], value=metric_entry["value"] + ) + result += f"\n{entry}\n" + + return result + + class AbstractIsBusyMonitor: - def __init__(self, poll_interval: float) -> None: + def __init__(self, poll_interval: float, metrics: MetricsManager) -> None: self._poll_interval: float = poll_interval self._keep_running: bool = True self._thread: Thread | None = None + self.metrics = metrics self.is_busy: bool = True self.thread_executor = ThreadPoolExecutor(max_workers=_THREAD_EXECUTOR_WORKERS) @@ -158,8 +195,8 @@ def _get_sibling_processes() -> List[psutil.Process]: class JupyterKernelMonitor(AbstractIsBusyMonitor): - def __init__(self, poll_interval: float) -> None: - super().__init__(poll_interval=poll_interval) + def __init__(self, poll_interval: float, metrics: MetricsManager) -> None: + super().__init__(poll_interval=poll_interval, metrics=metrics) self.are_kernels_busy: bool = False def _get(self, path: str) -> dict: @@ -200,8 +237,10 @@ class CPUUsageMonitor(AbstractIsBusyMonitor): and averages over 1 second. """ - def __init__(self, poll_interval: float, *, busy_threshold: float): - super().__init__(poll_interval=poll_interval) + def __init__( + self, poll_interval: float, metrics: MetricsManager, *, busy_threshold: float + ): + super().__init__(poll_interval=poll_interval, metrics=metrics) self.busy_threshold = busy_threshold # snapshot @@ -270,11 +309,12 @@ class DiskUsageMonitor(AbstractIsBusyMonitor): def __init__( self, poll_interval: float, + metrics: MetricsManager, *, read_usage_threshold: int, write_usage_threshold: int, ): - super().__init__(poll_interval=poll_interval) + super().__init__(poll_interval=poll_interval, metrics=metrics) self.read_usage_threshold = read_usage_threshold self.write_usage_threshold = write_usage_threshold @@ -369,11 +409,12 @@ class NetworkUsageMonitor(AbstractIsBusyMonitor): def __init__( self, poll_interval: float, + metrics: MetricsManager, *, received_usage_threshold: int, sent_usage_threshold: int, ): - super().__init__(poll_interval=poll_interval) + super().__init__(poll_interval=poll_interval, metrics=metrics) self.received_usage_threshold = received_usage_threshold self.sent_usage_threshold = sent_usage_threshold @@ -383,6 +424,17 @@ def __init__( self.bytes_received: BytesReceived = 0 self.bytes_sent: BytesSent = 0 + self.metrics.register_metric( + "network_bytes_received_total", + help="Total number of bytes received across all network interfaces.", + initial_value=0, + ) + self.metrics.register_metric( + "network_bytes_sent_total", + help="Total number of bytes sent across all network interfaces.", + initial_value=0, + ) + def _sample_total_network_usage( self, ) -> Tuple[TimeSeconds, BytesReceived, BytesSent]: @@ -427,6 +479,9 @@ def _update_total_network_usage(self) -> None: self.bytes_received = bytes_received self.bytes_sent = bytes_sent + self.metrics.inc_metric("network_bytes_received_total", bytes_received) + self.metrics.inc_metric("network_bytes_sent_total", bytes_sent) + def _check_if_busy(self) -> bool: self._update_total_network_usage() return ( @@ -456,14 +511,19 @@ def __init__(self, interval: float) -> None: self._monitors: list[AbstractIsBusyMonitor] = [] + self.metrics = MetricsManager() + if not DISABLE_JUPYTER_KERNEL_MONITOR: self._monitors.append( - JupyterKernelMonitor(JUPYTER_NOTEBOOK_KERNEL_CHECK_INTERVAL_S) + JupyterKernelMonitor( + JUPYTER_NOTEBOOK_KERNEL_CHECK_INTERVAL_S, self.metrics + ) ) if not DISABLE_CPU_USAGE_MONITOR: self._monitors.append( CPUUsageMonitor( MONITOR_INTERVAL_S, + self.metrics, busy_threshold=BUSY_USAGE_THRESHOLD_CPU, ) ) @@ -471,6 +531,7 @@ def __init__(self, interval: float) -> None: self._monitors.append( DiskUsageMonitor( MONITOR_INTERVAL_S, + self.metrics, read_usage_threshold=BUSY_USAGE_THRESHOLD_DISK_READ, write_usage_threshold=BUSY_USAGE_THRESHOLD_DISK_WRITE, ) @@ -479,6 +540,7 @@ def __init__(self, interval: float) -> None: self._monitors.append( NetworkUsageMonitor( MONITOR_INTERVAL_S, + self.metrics, received_usage_threshold=BUSY_USAGE_THRESHOLD_NETWORK_RECEIVED, sent_usage_threshold=BUSY_USAGE_THRESHOLD_NETWORK_SENT, ) @@ -506,6 +568,9 @@ def get_debug(self) -> Dict[str, Any]: merged_dict.update(x.get_debug_entry()) return merged_dict + def get_metrics(self) -> str: + return self.metrics.format_metrics() + def _worker(self) -> None: while self._keep_running: with suppress(Exception): @@ -545,25 +610,33 @@ def __init__(self, server_address, RequestHandlerClass, state): class JSONRequestHandler(BaseHTTPRequestHandler): - def _send_response(self, code: int, data: dict) -> None: + def _send_json(self, code: int, data: Any) -> None: self.send_response(code) self.send_header("Content-type", "application/json") self.end_headers() self.wfile.write(json.dumps(data).encode("utf-8")) + def _send_text(self, code: int, text: str) -> None: + self.send_response(code) + self.send_header("Content-type", "text/plain") + self.end_headers() + self.wfile.write(text.encode("utf-8")) + @property def activity_manager(self) -> ActivityManager: return self.server.state.activity_manager def do_GET(self): if self.path == "/activity": - self._send_response( + self._send_json( 200, {"seconds_inactive": self.activity_manager.get_idle_seconds()} ) elif self.path == "/debug": - self._send_response(200, self.activity_manager.get_debug()) + self._send_json(200, self.activity_manager.get_debug()) + elif self.path == "/metrics": + self._send_text(200, self.activity_manager.get_metrics()) else: # Handle case where the endpoint is not found - self._send_response(404, {"error": "Resource not found"}) + self._send_json(404, {"error": "Resource not found"}) def make_server(port: int) -> HTTPServerWithState: diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index f56a77d..9b49507 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -40,15 +40,23 @@ def _(pids: List[int]) -> None: return _ +@pytest.fixture +def metrics_manager() -> activity_monitor.MetricsManager: + return activity_monitor.MetricsManager() + + def test_cpu_usage_monitor_not_busy( socket_server: None, mock__get_sibling_processes: Callable[[List[int]], List[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], + metrics_manager: activity_monitor.MetricsManager, ): activity_generator = create_activity_generator(network=False, cpu=False, disk=False) mock__get_sibling_processes([activity_generator.get_pid()]) - with activity_monitor.CPUUsageMonitor(1, busy_threshold=5) as cpu_usage_monitor: + with activity_monitor.CPUUsageMonitor( + 1, metrics_manager, busy_threshold=5 + ) as cpu_usage_monitor: for attempt in Retrying( stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True ): @@ -61,11 +69,14 @@ def test_cpu_usage_monitor_still_busy( socket_server: None, mock__get_sibling_processes: Callable[[List[int]], List[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], + metrics_manager: activity_monitor.MetricsManager, ): activity_generator = create_activity_generator(network=False, cpu=True, disk=False) mock__get_sibling_processes([activity_generator.get_pid()]) - with activity_monitor.CPUUsageMonitor(0.5, busy_threshold=5) as cpu_usage_monitor: + with activity_monitor.CPUUsageMonitor( + 0.5, metrics_manager, busy_threshold=5 + ) as cpu_usage_monitor: # wait for monitor to trigger time.sleep(1) @@ -78,12 +89,13 @@ def test_disk_usage_monitor_not_busy( socket_server: None, mock__get_sibling_processes: Callable[[List[int]], List[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], + metrics_manager: activity_monitor.MetricsManager, ): activity_generator = create_activity_generator(network=False, cpu=False, disk=False) mock__get_sibling_processes([activity_generator.get_pid()]) with activity_monitor.DiskUsageMonitor( - 0.5, read_usage_threshold=0, write_usage_threshold=0 + 0.5, metrics_manager, read_usage_threshold=0, write_usage_threshold=0 ) as disk_usage_monitor: for attempt in Retrying( stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True @@ -100,12 +112,13 @@ def test_disk_usage_monitor_still_busy( socket_server: None, mock__get_sibling_processes: Callable[[List[int]], List[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], + metrics_manager: activity_monitor.MetricsManager, ): activity_generator = create_activity_generator(network=False, cpu=False, disk=True) mock__get_sibling_processes([activity_generator.get_pid()]) with activity_monitor.DiskUsageMonitor( - 0.5, read_usage_threshold=0, write_usage_threshold=0 + 0.5, metrics_manager, read_usage_threshold=0, write_usage_threshold=0 ) as disk_usage_monitor: # wait for monitor to trigger time.sleep(1) @@ -130,12 +143,13 @@ def test_network_usage_monitor_not_busy( socket_server: None, mock__get_sibling_processes: Callable[[List[int]], List[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], + metrics_manager: activity_monitor.MetricsManager, ): activity_generator = create_activity_generator(network=False, cpu=False, disk=False) mock__get_sibling_processes([activity_generator.get_pid()]) with activity_monitor.NetworkUsageMonitor( - 0.5, received_usage_threshold=0, sent_usage_threshold=0 + 0.5, metrics_manager, received_usage_threshold=0, sent_usage_threshold=0 ) as network_usage_monitor: for attempt in Retrying( stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True @@ -157,12 +171,13 @@ def test_network_usage_monitor_still_busy( socket_server: None, mock__get_sibling_processes: Callable[[List[int]], List[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], + metrics_manager: activity_monitor.MetricsManager, ): activity_generator = create_activity_generator(network=True, cpu=False, disk=False) mock__get_sibling_processes([activity_generator.get_pid()]) with activity_monitor.NetworkUsageMonitor( - 0.5, received_usage_threshold=0, sent_usage_threshold=0 + 0.5, metrics_manager, received_usage_threshold=0, sent_usage_threshold=0 ) as network_usage_monitor: # wait for monitor to trigger time.sleep(1) @@ -174,9 +189,11 @@ def test_network_usage_monitor_still_busy( @pytest.mark.parametrize("are_kernels_busy", [True, False]) def test_jupyter_kernel_monitor( - mock_jupyter_kernel_monitor: None, are_kernels_busy: bool + mock_jupyter_kernel_monitor: None, + are_kernels_busy: bool, + metrics_manager: activity_monitor.MetricsManager, ): - kernel_monitor = activity_monitor.JupyterKernelMonitor(1) + kernel_monitor = activity_monitor.JupyterKernelMonitor(1, metrics_manager) kernel_monitor._update_kernels_activity() assert kernel_monitor.are_kernels_busy is are_kernels_busy @@ -222,3 +239,35 @@ def test_activity_monitor_becomes_not_busy( assert result.status_code == 200 response = result.json() assert response["seconds_inactive"] > 0 + + +@pytest.mark.parametrize("are_kernels_busy", [False]) +def test_metrics_endpoint( + mock_activity_manager_config: None, + socket_server: None, + mock__get_sibling_processes: Callable[[List[int]], List[psutil.Process]], + create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], + http_server: None, + server_url: str, +): + activity_generator = create_activity_generator(network=False, cpu=False, disk=False) + mock__get_sibling_processes([activity_generator.get_pid()]) + + result = requests.get(f"{server_url}/metrics", timeout=1) + assert result.status_code == 200 + assert result.text.count("network_bytes_received_total") == 3 + assert result.text.count("network_bytes_sent_total") == 3 + + +def test_metric_manager(metrics_manager: activity_monitor.MetricsManager): + metrics_manager.register_metric("metric_name", help="metric_help", initial_value=0) + + total_count = 0 + for i in range(10): + total_count += i + + metrics_manager.inc_metric("metric_name", i) + formatted_metrics = metrics_manager.format_metrics() + assert "# HELP metric_name metric_help" in formatted_metrics + assert "# TYPE metric_name counter" in formatted_metrics + assert f"metric_name {total_count}" in formatted_metrics From 5b02a4dcdc18ad9cad57f4626fcbfe70e8896fa7 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 14:41:45 +0100 Subject: [PATCH 23/27] updated readme --- README.md | 152 +++++++++++++++++++++++++++++++++++++++- src/activity_monitor.py | 2 +- 2 files changed, 152 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e28928b..55c6836 100644 --- a/README.md +++ b/README.md @@ -1 +1,151 @@ -# service-activity-monitor \ No newline at end of file +# service-activity-monitor + +Tooling for monitoring processes activity inside a docker container. Depends on python and the well supported `psutil` package. + +Currently supported: +- child process cpu usage +- child process disk usage +- overall container network usage +- jupyter kernel activity + +# Quick-ish start + +## Step 1 + +Inside your `Dockerfile` add the following. Please replace the `TARGET_VERSION` and adjust all `BUSY_THRESHOLD` for your application. + +```Dockerfile +ARG ACTIVITY_MONITOR_VERSION=TARGET_VERSION + +# Detection thresholds for application +ENV ACTIVITY_MONITOR_BUSY_THRESHOLD_CPU_PERCENT=1000 +ENV ACTIVITY_MONITOR_BUSY_THRESHOLD_DISK_READ_BPS=1099511627776 +ENV ACTIVITY_MONITOR_BUSY_THRESHOLD_DISK_WRITE_BPS=1099511627776 +ENV ACTIVITY_MONITOR_BUSY_THRESHOLD_NETWORK_RECEIVE_BPS=1099511627776 +ENV ACTIVITY_MONITOR_BUSY_THRESHOLD_NETWORK_SENT__BPS=1099511627776 + +# install service activity monitor +RUN apt-get update && \ + apt-get install -y curl && \ + # install using curl + curl -sSL https://raw.githubusercontent.com/ITISFoundation/service-activity-monitor/main/scripts/install.sh | \ + bash -s -- ${ACTIVITY_MONITOR_VERSION} && \ + # cleanup and remove curl + apt-get purge -y --auto-remove curl && \ + rm -rf /var/lib/apt/lists/* +``` + +## Step 2 + +Inside your boot script before starting your application start something similar to + +```bash +python /usr/local/bin/service-monitor/activity_monitor.py & +``` + +In most cases something similar to the below will do the trick (don't forget to replace `USER`). + +```bash +exec gosu "$USER" python /usr/local/bin/service-monitor/activity_monitor.py & +``` + +## Step 3 + +Inside you image's label something similar to this should end up: + +```yaml +... +services: + ... + YOUR_SERVICE: + ... + build: + labels: + ... + simcore.service.callbacks-mapping: '{"inactivity": {"service": "container", + "command": ["python", "/usr/local/bin/service-monitor/activity.py"], "timeout": + 1.0}}' +``` +Note if your service defines it's own compose spec. `container` must be replaced with the name of the service where these are installed. + +In most cases you will easily configure this by adding the following to your `.osparc/service-name/runtime.yaml` file: + +```yaml +callbacks-mapping: + inactivity: + service: container + command: ["python", "/usr/local/bin/service-monitor/activity.py"] + timeout: 1 +``` +# Available configuration options + +##### The following flags disable the monitors. By default all the monitors are enabled. +- `ACTIVITY_MONITOR_DISABLE_JUPYTER_KERNEL_MONITOR` default=`False`: disables and does not configure the jupyter kernel monitor +- `ACTIVITY_MONITOR_DISABLE_CPU_USAGE_MONITOR` default=`False`: disables and does not configure the cpu usage monitor +- `ACTIVITY_MONITOR_DISABLE_DISK_USAGE_MONITOR` default=`False`: disables and does not configure the disk usage monitor +- `ACTIVITY_MONITOR_DISABLE_NETWORK_USAGE_MONITOR` default=`False`: disables and does not configure the network usage monitor + +##### All the following env vars are to be interpreted as follows: if the value is greater than (>) threshold, the corresponding manager will report busy. +- `ACTIVITY_MONITOR_BUSY_THRESHOLD_CPU_PERCENT` [percentage(%)], default=`1000`: used cpu usage monitor +- `ACTIVITY_MONITOR_BUSY_THRESHOLD_DISK_READ_BPS` [bytes], default=`1099511627776`: used by disk usage monitor +- `ACTIVITY_MONITOR_BUSY_THRESHOLD_DISK_WRITE_BPS` [bytes], default=`1099511627776`: used by disk usage monitor +- `ACTIVITY_MONITOR_BUSY_THRESHOLD_NETWORK_RECEIVE_BPS` [bytes], default=`1099511627776`: used by network usage monitor +- `ACTIVITY_MONITOR_BUSY_THRESHOLD_NETWORK_SENT__BPS` [bytes], default=`1099511627776`: used by network usage monitor + +##### Other: +- `ACTIVITY_MONITOR_JUPYTER_NOTEBOOK_BASE_URL` [str] default=`http://localhost:8888`: endpoint where the jupyter notebook is exposed +- `ACTIVITY_MONITOR_JUPYTER_NOTEBOOK_KERNEL_CHECK_INTERVAL_S` [float] default=`5`: used by the jupyter kernel monitor to update it's metrics +- `ACTIVITY_MONITOR_MONITOR_INTERVAL_S` [float] default=`1`: all other monitors us this interval to update their metrics +- `ACTIVITY_MONITOR_LISTEN_PORT` [int] default=`19597`: port on which the http server will be exposed + + + +# API + + +### `GET /activity` + +Used by oSPARC top retrieve the status of the service if it's active or not + +```json +{"seconds_inactive": 0} +``` + +```bash +curl http://localhost:19597/activity +``` + +### `GET /debug` + +Used for debugging and not used by oSPARC + +```json +{ + "kernel_monitor": {"is_busy": true}, + "cpu_usage": {"is_busy": false, "total": 0}, + "disk_usage": {"is_busy": false, "total": {"bytes_read_per_second": 0, "bytes_write_per_second": 0}}, + "network_usage": {"is_busy": false, "total": {"bytes_received_per_second": 345452, "bytes_sent_per_second": 343809}} +} +``` + +```bash +curl http://localhost:19597/debug +``` + +### `GET /metrics` + +Exposes Prometheus metrics relative to the running processes. + +``` +# HELP network_bytes_received_total Total number of bytes received across all network interfaces. +# TYPE network_bytes_received_total counter +network_bytes_received_total 23434790 + +# HELP network_bytes_sent_total Total number of bytes sent across all network interfaces. +# TYPE network_bytes_sent_total counter +network_bytes_sent_total 22893843 +``` + +```bash +curl http://localhost:19597/metrics +``` \ No newline at end of file diff --git a/src/activity_monitor.py b/src/activity_monitor.py index fb9373b..f70f21c 100755 --- a/src/activity_monitor.py +++ b/src/activity_monitor.py @@ -104,7 +104,7 @@ def format_metrics(self) -> str: entry = _METRICS_COUNTER_TEMPLATE.format( name=name, help=metric_entry["help"], value=metric_entry["value"] ) - result += f"\n{entry}\n" + result += f"{entry}" return result From 00ddadcd7d57868cc55e87ae36549b23b0713c27 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 14:43:07 +0100 Subject: [PATCH 24/27] refactor --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 55c6836..567b2b7 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,7 @@ callbacks-mapping: -# API +# Exposed API ### `GET /activity` From 43aa3a5931c6b246b13098ae18f0345f3faf34de Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 14:48:49 +0100 Subject: [PATCH 25/27] update readme --- README.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 567b2b7..966ceb3 100644 --- a/README.md +++ b/README.md @@ -2,11 +2,15 @@ Tooling for monitoring processes activity inside a docker container. Depends on python and the well supported `psutil` package. -Currently supported: -- child process cpu usage -- child process disk usage -- overall container network usage -- jupyter kernel activity +Monitors: + - child process cpu usage + - child process disk usage + - overall container network usage + - jupyter kernel activity + +Exposes Prometheus metrics regarding: + - total outgoing network usage + - total incoming network usage # Quick-ish start From 42fc56ca055507687a0fe28eb7a26c3a3e3733e2 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 14:56:57 +0100 Subject: [PATCH 26/27] added missing --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 966ceb3..b71001e 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,7 @@ Note if your service defines it's own compose spec. `container` must be replaced In most cases you will easily configure this by adding the following to your `.osparc/service-name/runtime.yaml` file: ```yaml +... callbacks-mapping: inactivity: service: container From 18841818cac12682caf2bac5fb79bcd7bd6cff8a Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 15:18:22 +0100 Subject: [PATCH 27/27] refactor --- src/activity_monitor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/activity_monitor.py b/src/activity_monitor.py index f70f21c..c0a6f8d 100755 --- a/src/activity_monitor.py +++ b/src/activity_monitor.py @@ -609,7 +609,7 @@ def __init__(self, server_address, RequestHandlerClass, state): super().__init__(server_address, RequestHandlerClass) -class JSONRequestHandler(BaseHTTPRequestHandler): +class MainRequestHandler(BaseHTTPRequestHandler): def _send_json(self, code: int, data: Any) -> None: self.send_response(code) self.send_header("Content-type", "application/json") @@ -645,7 +645,7 @@ def make_server(port: int) -> HTTPServerWithState: state.activity_manager.start() server_address = ("", port) # Listen on all interfaces, port 8000 - return HTTPServerWithState(server_address, JSONRequestHandler, state) + return HTTPServerWithState(server_address, MainRequestHandler, state) def main():