Skip to content

Commit

Permalink
config
Browse files Browse the repository at this point in the history
Signed-off-by: Cody Yu <[email protected]>
  • Loading branch information
comaniac committed Dec 16, 2024
1 parent f0b4e99 commit a9516ba
Showing 1 changed file with 15 additions and 9 deletions.
24 changes: 15 additions & 9 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,6 @@ def __post_init__(self):
if not self.tokenizer:
self.tokenizer = self.model

# Override the default value of enable_prefix_caching if it's not set
# by user.
if self.enable_prefix_caching is None:
self.enable_prefix_caching = bool(envs.VLLM_USE_V1)
# Override max_num_seqs if it's not set by user.
if self.max_num_seqs is None:
self.max_num_seqs = 256 if not envs.VLLM_USE_V1 else 1024
Expand Down Expand Up @@ -1027,11 +1023,21 @@ def create_engine_config(self,
device_config = DeviceConfig(device=self.device)
model_config = self.create_model_config()

if model_config.is_multimodal_model and not envs.VLLM_USE_V1:
if self.enable_prefix_caching:
logger.warning("--enable-prefix-caching is currently not "
"supported for multimodal models in v0 and "
"has been disabled.")
# Configure prefix caching
if self.enable_prefix_caching is None:
if not envs.VLLM_USE_V1:
# V0: default off.
self.enable_prefix_caching = False
else:
# V1: default on for non-multimodal models.
self.enable_prefix_caching = (
not model_config.is_multimodal_model)
elif (self.enable_prefix_caching and model_config.is_multimodal_model
and not envs.VLLM_USE_V1):
# Force disable prefix caching for multimodal models in V0.
logger.warning("--enable-prefix-caching is currently not "
"supported for multimodal models in v0 and "
"has been disabled.")
self.enable_prefix_caching = False

cache_config = CacheConfig(
Expand Down

0 comments on commit a9516ba

Please sign in to comment.