Skip to content

Commit

Permalink
Fixed incompatibility issue for PyTorch>=2.3.0 (#193)
Browse files Browse the repository at this point in the history
Co-authored-by: Jerome Anand <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Jun 11, 2024
1 parent 860189d commit aa5e0d9
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 7 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Fixed deepspeed documentation & tests based on synapse AI release 1.15.1 and latest PTL fabric. ([#184](https://github.com/Lightning-AI/lightning-Habana/pull/184))
- Workaround to resolve label name issue in HPUProfiler with torch.compile. ([#185](https://github.com/Lightning-AI/lightning-Habana/pull/185))

- Fixed incompatibility issue for PyTorch>=2.3.0 ([#193](https://github.com/Lightning-AI/lightning-Habana/pull/193))
-
### Removed

-
Expand Down
14 changes: 11 additions & 3 deletions src/lightning_habana/pytorch/strategies/ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
from lightning_habana.pytorch.plugins.io_plugin import HPUCheckpointIO
from lightning_habana.pytorch.strategies.parallel import HPUParallelStrategy
from lightning_habana.utils.hpu_distributed import _sync_ddp_if_available
from lightning_habana.utils.imports import _HABANA_FRAMEWORK_AVAILABLE
from lightning_habana.utils.imports import _HABANA_FRAMEWORK_AVAILABLE, _TORCH_LESSER_2_3_0

if _HABANA_FRAMEWORK_AVAILABLE:
import habana_frameworks.torch.core as htcore
Expand Down Expand Up @@ -196,7 +196,12 @@ def _hpu_broadcast_object_list(object_list, src=0, group=None, device=None): #
my_rank = get_rank()
# Serialize object_list elements to tensors on src rank.
if my_rank == src:
tensor_list, size_list = zip(*[_object_to_tensor(obj, device) for obj in object_list])
tensor_list = []
size_list = []
if _TORCH_LESSER_2_3_0:
tensor_list, size_list = zip(*[_object_to_tensor(obj, device) for obj in object_list])
else:
tensor_list, size_list = zip(*[_object_to_tensor(obj, device, group) for obj in object_list])
object_sizes_tensor = torch.cat(size_list)
else:
object_sizes_tensor = torch.empty(len(object_list), dtype=torch.long)
Expand Down Expand Up @@ -258,4 +263,7 @@ def _hpu_broadcast_object_list(object_list, src=0, group=None, device=None): #
if obj_view.device != torch.device("cpu"):
obj_view = obj_view.cpu()
offset += obj_size
object_list[i] = _tensor_to_object(obj_view, obj_size)
if _TORCH_LESSER_2_3_0:
object_list[i] = _tensor_to_object(obj_view, obj_size)
else:
object_list[i] = _tensor_to_object(obj_view, obj_size, group)
14 changes: 11 additions & 3 deletions src/lightning_habana/pytorch/strategies/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@

from lightning_habana.pytorch.plugins.io_plugin import HPUCheckpointIO
from lightning_habana.utils.hpu_distributed import _sync_ddp_if_available
from lightning_habana.utils.imports import _HABANA_FRAMEWORK_AVAILABLE
from lightning_habana.utils.imports import _HABANA_FRAMEWORK_AVAILABLE, _TORCH_LESSER_2_3_0

if _HABANA_FRAMEWORK_AVAILABLE:
import habana_frameworks.torch.core as htcore
Expand Down Expand Up @@ -191,7 +191,12 @@ def _hpu_broadcast_object_list(object_list, src=0, group=None, device=None): #
my_rank = get_rank()
# Serialize object_list elements to tensors on src rank.
if my_rank == src:
tensor_list, size_list = zip(*[_object_to_tensor(obj, device) for obj in object_list])
tensor_list = []
size_list = []
if _TORCH_LESSER_2_3_0:
tensor_list, size_list = zip(*[_object_to_tensor(obj, device) for obj in object_list])
else:
tensor_list, size_list = zip(*[_object_to_tensor(obj, device, group) for obj in object_list])
object_sizes_tensor = torch.cat(size_list)
else:
object_sizes_tensor = torch.empty(len(object_list), dtype=torch.long)
Expand Down Expand Up @@ -253,4 +258,7 @@ def _hpu_broadcast_object_list(object_list, src=0, group=None, device=None): #
if obj_view.device != torch.device("cpu"):
obj_view = obj_view.cpu()
offset += obj_size
object_list[i] = _tensor_to_object(obj_view, obj_size)
if _TORCH_LESSER_2_3_0:
object_list[i] = _tensor_to_object(obj_view, obj_size)
else:
object_list[i] = _tensor_to_object(obj_view, obj_size, group)
1 change: 1 addition & 0 deletions src/lightning_habana/utils/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
_HPU_SYNAPSE_GREATER_EQUAL_1_14_0 = Version(get_hpu_synapse_version()) >= Version("1.14.0")
_TORCH_LESSER_EQUAL_1_13_1 = compare_version("torch", operator.le, "1.13.1")
_TORCH_GREATER_EQUAL_2_0_0 = compare_version("torch", operator.ge, "2.0.0")
_TORCH_LESSER_2_3_0 = Version(Version(torch.__version__).base_version) < Version("2.3.0")
_LIGHTNING_GREATER_EQUAL_2_0_0 = compare_version("lightning", operator.ge, "2.0.0") or compare_version(
"pytorch_lightning", operator.ge, "2.0.0"
)
Expand Down

0 comments on commit aa5e0d9

Please sign in to comment.