diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py index ff25a9b2e9cac..d56fcbdb1e7c4 100644 --- a/vllm/v1/engine/core_client.py +++ b/vllm/v1/engine/core_client.py @@ -1,5 +1,5 @@ -import atexit import os +import weakref from typing import List, Optional import msgspec @@ -165,15 +165,9 @@ def __init__( ready_path=ready_path, # type: ignore[misc] **kwargs, ) - atexit.register(self.shutdown) + self._finalizer = weakref.finalize(self, self.shutdown) def shutdown(self): - # During final garbage collection in process shutdown, atexit may be - # None. - if atexit: - # in case shutdown gets called via __del__ first - atexit.unregister(self.shutdown) - # Shut down the zmq context. self.ctx.destroy(linger=0) @@ -197,9 +191,6 @@ def shutdown(self): os.remove(socket_file) self.proc_handle = None - def __del__(self): - self.shutdown() - class SyncMPClient(MPClient): """Synchronous client for multi-proc EngineCore.""" diff --git a/vllm/v1/executor/multiproc_executor.py b/vllm/v1/executor/multiproc_executor.py index 17441dacdc5cf..128101aa6956d 100644 --- a/vllm/v1/executor/multiproc_executor.py +++ b/vllm/v1/executor/multiproc_executor.py @@ -1,9 +1,9 @@ -import atexit import os import pickle import signal import sys import time +import weakref from dataclasses import dataclass from enum import Enum, auto from multiprocessing.process import BaseProcess @@ -37,7 +37,7 @@ class MultiprocExecutor(Executor): def __init__(self, vllm_config: VllmConfig) -> None: # Call self.shutdown at exit to clean up # and ensure workers will be terminated. - atexit.register(self.shutdown) + self._finalizer = weakref.finalize(self, self.shutdown) self.vllm_config = vllm_config self.parallel_config = vllm_config.parallel_config @@ -195,14 +195,10 @@ def _cleanup_sockets(self): os.remove(socket_path) def shutdown(self): - if atexit: - # in case shutdown was called explicitly, we don't need to call it - # again - atexit.unregister(self.shutdown) """Properly shut down the executor and its workers""" if getattr(self, 'shutting_down', False): self.shutting_down = True - for w in self.workers: #TODO: not sure if needed + for w in self.workers: w.worker_response_mq = None self._ensure_worker_termination()