Skip to content

Commit

Permalink
Disable cudaMallocAsync for post2 release
Browse files Browse the repository at this point in the history
  • Loading branch information
turboderp committed Feb 14, 2024
1 parent 0535783 commit 75f969a
Showing 1 changed file with 10 additions and 8 deletions.
18 changes: 10 additions & 8 deletions exllamav2/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,16 @@
# Set CUDA context to lazy loading since we won't need 95% of the modules in Torch
os.environ["CUDA_MODULE_LOADING"] = "LAZY"

# Set cudaMallocAsync allocator by default as it appears slightly more memory efficient, unless Torch is already
# imported in which case changing the allocator would cause it to crash
if not "PYTORCH_CUDA_ALLOC_CONF" in os.environ:
try:
x = torch.__version__
# TODO: Should maybe be a warning here?
except NameError:
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "backend:cudaMallocAsync"
# Disabled for 0.0.13.post2
#
# # Set cudaMallocAsync allocator by default as it appears slightly more memory efficient, unless Torch is already
# # imported in which case changing the allocator would cause it to crash
# if not "PYTORCH_CUDA_ALLOC_CONF" in os.environ:
# try:
# x = torch.__version__
# # TODO: Should maybe be a warning here?
# except NameError:
# os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "backend:cudaMallocAsync"

import torch
import math
Expand Down

0 comments on commit 75f969a

Please sign in to comment.