-
Notifications
You must be signed in to change notification settings - Fork 287
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Further improvements to pending kernels managment #732
Changes from all commits
a877478
2b8790a
8076482
73ae431
5f97e4e
dfc1a85
54c86a0
94595af
de8d2c5
9bc967a
2d7626c
a20be02
da3e79d
5841560
a382498
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,7 +12,6 @@ jobs: | |
strategy: | ||
matrix: | ||
python-version: ["3.9"] | ||
|
||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v2 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,7 @@ with Jupyter kernels. | |
kernels | ||
wrapperkernels | ||
provisioning | ||
pending-kernels | ||
|
||
.. toctree:: | ||
:maxdepth: 2 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
Pending Kernels | ||
=============== | ||
|
||
*Added in 7.1.0* | ||
|
||
In scenarios where an kernel takes a long time to start (e.g. kernels running remotely), it can be advantageous to immediately return the kernel's model and ID from key methods like ``.start_kernel()`` and ``.shutdown_kernel()``. The kernel will continue its task without blocking other managerial actions. | ||
|
||
This intermediate state is called a **"pending kernel"**. | ||
|
||
How they work | ||
------------- | ||
|
||
When ``.start_kernel()`` or ``.shutdown_kernel()`` is called, a ``Future`` is created under the ``KernelManager.ready`` property. This property can be awaited anytime to ensure that the kernel moves out of its pending state, e.g.: | ||
|
||
.. code-block:: python | ||
|
||
# await a Kernel Manager's `.ready` property to | ||
# block further action until the kernel is out | ||
# of its pending state. | ||
await kernel_manager.ready | ||
|
||
Once the kernel is finished pending, ``.ready.done()`` will be ``True`` and either 1) ``.ready.result()`` will return ``None`` or 2) ``.ready.exception()`` will return a raised exception | ||
|
||
Using pending kernels | ||
--------------------- | ||
|
||
The most common way to interact with pending kernels is through the ``MultiKernelManager``—the object that manages a collection of kernels—by setting its ``use_pending_kernels`` trait to ``True``. Pending kernels are "opt-in"; they are not used by default in the ``MultiKernelManager``. | ||
|
||
When ``use_pending_kernels`` is ``True``, the following changes are made to the ``MultiKernelManager``: | ||
|
||
1. ``start_kernel`` and ``stop_kernel`` return immediately while running the pending task in a background thread. | ||
2. The following methods raise a ``RuntimeError`` if a kernel is pending: | ||
* ``restart_kernel`` | ||
* ``interrupt_kernel`` | ||
* ``shutdown_kernel`` | ||
3. ``shutdown_all`` will wait for all pending kernels to become ready before attempting to shut them down. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -97,7 +97,12 @@ def create_kernel_manager(*args, **kwargs) -> KernelManager: | |
|
||
context = Instance("zmq.Context") | ||
|
||
_starting_kernels = Dict() | ||
_pending_kernels = Dict() | ||
|
||
@property | ||
def _starting_kernels(self): | ||
"""A shim for backwards compatibility.""" | ||
return self._pending_kernels | ||
Comment on lines
+102
to
+105
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be marked as deprecated? |
||
|
||
@default("context") | ||
def _context_default(self) -> zmq.Context: | ||
|
@@ -165,7 +170,22 @@ async def _add_kernel_when_ready( | |
await kernel_awaitable | ||
self._kernels[kernel_id] = km | ||
finally: | ||
self._starting_kernels.pop(kernel_id, None) | ||
self._pending_kernels.pop(kernel_id, None) | ||
|
||
async def _remove_kernel_when_ready( | ||
self, kernel_id: str, kernel_awaitable: t.Awaitable | ||
) -> None: | ||
try: | ||
await kernel_awaitable | ||
self.remove_kernel(kernel_id) | ||
finally: | ||
self._pending_kernels.pop(kernel_id, None) | ||
|
||
def _using_pending_kernels(self): | ||
"""Returns a boolean; a clearer method for determining if | ||
this multikernelmanager is using pending kernels or not | ||
""" | ||
return getattr(self, 'use_pending_kernels', False) | ||
|
||
async def _async_start_kernel(self, kernel_name: t.Optional[str] = None, **kwargs) -> str: | ||
"""Start a new kernel. | ||
|
@@ -186,17 +206,38 @@ async def _async_start_kernel(self, kernel_name: t.Optional[str] = None, **kwarg | |
|
||
starter = ensure_async(km.start_kernel(**kwargs)) | ||
fut = asyncio.ensure_future(self._add_kernel_when_ready(kernel_id, km, starter)) | ||
self._starting_kernels[kernel_id] = fut | ||
|
||
if getattr(self, 'use_pending_kernels', False): | ||
self._pending_kernels[kernel_id] = fut | ||
# Handling a Pending Kernel | ||
if self._using_pending_kernels(): | ||
# If using pending kernels, do not block | ||
# on the kernel start. | ||
self._kernels[kernel_id] = km | ||
else: | ||
await fut | ||
# raise an exception if one occurred during kernel startup. | ||
if km.ready.exception(): | ||
raise km.ready.exception() # type: ignore | ||
|
||
return kernel_id | ||
|
||
start_kernel = run_sync(_async_start_kernel) | ||
|
||
async def _shutdown_kernel_when_ready( | ||
self, | ||
kernel_id: str, | ||
now: t.Optional[bool] = False, | ||
restart: t.Optional[bool] = False, | ||
) -> None: | ||
"""Wait for a pending kernel to be ready | ||
before shutting the kernel down. | ||
""" | ||
# Only do this if using pending kernels | ||
if self._using_pending_kernels(): | ||
kernel = self._kernels[kernel_id] | ||
await kernel.ready | ||
# Once out of a pending state, we can call shutdown. | ||
await ensure_async(self.shutdown_kernel(kernel_id, now=now, restart=restart)) | ||
|
||
async def _async_shutdown_kernel( | ||
self, | ||
kernel_id: str, | ||
|
@@ -215,15 +256,31 @@ async def _async_shutdown_kernel( | |
Will the kernel be restarted? | ||
""" | ||
self.log.info("Kernel shutdown: %s" % kernel_id) | ||
if kernel_id in self._starting_kernels: | ||
# If we're using pending kernels, block shutdown when a kernel is pending. | ||
if self._using_pending_kernels() and kernel_id in self._pending_kernels: | ||
raise RuntimeError("Kernel is in a pending state. Cannot shutdown.") | ||
# If the kernel is still starting, wait for it to be ready. | ||
elif kernel_id in self._starting_kernels: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the use of Perhaps we could be more explicit (and save an existence check [nit]) via: if kernel_id in self._pending_kernels:
if self._using_pending_kernels():
raise RuntimeError("Kernel is in a pending state. Cannot shutdown.")
else: # kernel is still starting, wait for its startup
kernel = self._pending_kernels[kernel_id]
try:
await kernel
except Exception:
self.remove_kernel(kernel_id) |
||
kernel = self._starting_kernels[kernel_id] | ||
try: | ||
await self._starting_kernels[kernel_id] | ||
await kernel | ||
except Exception: | ||
self.remove_kernel(kernel_id) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we want to update the |
||
return | ||
km = self.get_kernel(kernel_id) | ||
await ensure_async(km.shutdown_kernel(now, restart)) | ||
self.remove_kernel(kernel_id) | ||
# If a pending kernel raised an exception, remove it. | ||
if km.ready.exception(): | ||
self.remove_kernel(kernel_id) | ||
return | ||
stopper = ensure_async(km.shutdown_kernel(now, restart)) | ||
fut = asyncio.ensure_future(self._remove_kernel_when_ready(kernel_id, stopper)) | ||
self._pending_kernels[kernel_id] = fut | ||
# Await the kernel if not using pending kernels. | ||
if not self._using_pending_kernels(): | ||
await fut | ||
# raise an exception if one occurred during kernel shutdown. | ||
if km.ready.exception(): | ||
raise km.ready.exception() # type: ignore | ||
|
||
shutdown_kernel = run_sync(_async_shutdown_kernel) | ||
|
||
|
@@ -258,13 +315,17 @@ def remove_kernel(self, kernel_id: str) -> KernelManager: | |
async def _async_shutdown_all(self, now: bool = False) -> None: | ||
"""Shutdown all kernels.""" | ||
kids = self.list_kernel_ids() | ||
kids += list(self._starting_kernels) | ||
futs = [ensure_async(self.shutdown_kernel(kid, now=now)) for kid in set(kids)] | ||
kids += list(self._pending_kernels) | ||
futs = [ensure_async(self._shutdown_kernel_when_ready(kid, now=now)) for kid in set(kids)] | ||
await asyncio.gather(*futs) | ||
# When using "shutdown all", all pending kernels | ||
# should be awaited before exiting this method. | ||
if self._using_pending_kernels(): | ||
for km in self._kernels.values(): | ||
await km.ready | ||
|
||
shutdown_all = run_sync(_async_shutdown_all) | ||
|
||
@kernel_method | ||
def interrupt_kernel(self, kernel_id: str) -> None: | ||
"""Interrupt (SIGINT) the kernel by its uuid. | ||
|
||
|
@@ -273,7 +334,12 @@ def interrupt_kernel(self, kernel_id: str) -> None: | |
kernel_id : uuid | ||
The id of the kernel to interrupt. | ||
""" | ||
kernel = self.get_kernel(kernel_id) | ||
if not kernel.ready.done(): | ||
raise RuntimeError("Kernel is in a pending state. Cannot interrupt.") | ||
out = kernel.interrupt_kernel() | ||
self.log.info("Kernel interrupted: %s" % kernel_id) | ||
return out | ||
|
||
@kernel_method | ||
def signal_kernel(self, kernel_id: str, signum: int) -> None: | ||
|
@@ -291,8 +357,7 @@ def signal_kernel(self, kernel_id: str, signum: int) -> None: | |
""" | ||
self.log.info("Signaled Kernel %s with %s" % (kernel_id, signum)) | ||
|
||
@kernel_method | ||
def restart_kernel(self, kernel_id: str, now: bool = False) -> None: | ||
async def _async_restart_kernel(self, kernel_id: str, now: bool = False) -> None: | ||
"""Restart a kernel by its uuid, keeping the same ports. | ||
|
||
Parameters | ||
|
@@ -307,7 +372,15 @@ def restart_kernel(self, kernel_id: str, now: bool = False) -> None: | |
In all cases the kernel is restarted, the only difference is whether | ||
it is given a chance to perform a clean shutdown or not. | ||
""" | ||
kernel = self.get_kernel(kernel_id) | ||
if self._using_pending_kernels(): | ||
if not kernel.ready.done(): | ||
raise RuntimeError("Kernel is in a pending state. Cannot restart.") | ||
out = await ensure_async(kernel.restart_kernel(now=now)) | ||
self.log.info("Kernel restarted: %s" % kernel_id) | ||
return out | ||
|
||
restart_kernel = run_sync(_async_restart_kernel) | ||
|
||
@kernel_method | ||
def is_alive(self, kernel_id: str) -> bool: | ||
|
@@ -475,5 +548,6 @@ class AsyncMultiKernelManager(MultiKernelManager): | |
).tag(config=True) | ||
|
||
start_kernel = MultiKernelManager._async_start_kernel | ||
restart_kernel = MultiKernelManager._async_restart_kernel | ||
shutdown_kernel = MultiKernelManager._async_shutdown_kernel | ||
shutdown_all = MultiKernelManager._async_shutdown_all |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.