diff --git a/dali/python/backend_impl.cc b/dali/python/backend_impl.cc index b190a09a2dc..212f7cb08ba 100644 --- a/dali/python/backend_impl.cc +++ b/dali/python/backend_impl.cc @@ -718,6 +718,11 @@ void ExposeTensor(py::module &m) { Returns a `TensorGPU` object being a copy of this `TensorCPU`. )code", py::return_value_policy::take_ownership) + .def("as_cpu", [](Tensor &t) -> Tensor& { + return t; + }, + R"code(Bypass, since the object is already an instance of `TensorCPU`.)code", + py::return_value_policy::reference_internal) .def("copy_to_external", [](Tensor &t, py::object p) { CopyToExternal(ctypes_void_ptr(p), t, AccessOrder::host(), false); @@ -1174,6 +1179,10 @@ void ExposeTensorList(py::module &m) { Returns a `TensorListGPU` object being a copy of this `TensorListCPU`. )code", py::return_value_policy::take_ownership) + .def("as_cpu", [](TensorList &t) { + return t; + }, R"code(No-op, as it is already an instance of `TensorListCPU`.)code", + py::return_value_policy::internal_reference) .def("layout", [](TensorList &t) { return t.GetLayout().str(); }) diff --git a/dali/python/nvidia/dali/tensors.pyi b/dali/python/nvidia/dali/tensors.pyi index 4bfbe7da984..97aadc8d3f0 100644 --- a/dali/python/nvidia/dali/tensors.pyi +++ b/dali/python/nvidia/dali/tensors.pyi @@ -27,6 +27,7 @@ class TensorCPU: @overload def __init__(self, b, layout: str = ..., is_pinned: bool = ...) -> None: ... def _as_gpu(self, *args, **kwargs) -> Any: ... + def as_cpu(self) -> TensorCPU: ... # def __dlpack__(self, stream: Optional[int] = None) -> capsule: ... @@ -92,7 +93,8 @@ class TensorListCPU: def __init__(self, list_of_tensors: list, layout: str = ...) -> None: ... # def _as_gpu(self, *args, **kwargs) -> TensorListGPU: ... - + def as_cpu(self) -> TensorListCPU: ... + # def as_array(self) -> numpy.ndarray: ... def as_array(self) -> Any: ... def as_reshaped_tensor(self, arg0: List[int]) -> TensorCPU: ... diff --git a/dali/test/python/auto_aug/test_augmentations.py b/dali/test/python/auto_aug/test_augmentations.py index 8499ad30296..1881d87e1b4 100644 --- a/dali/test/python/auto_aug/test_augmentations.py +++ b/dali/test/python/auto_aug/test_augmentations.py @@ -19,7 +19,6 @@ from PIL import Image, ImageEnhance, ImageOps from nose2.tools import params, cartesian_params -import nvidia.dali.tensors as _tensors from nvidia.dali import fn, pipeline_def from nvidia.dali.auto_aug import augmentations as a from nvidia.dali.auto_aug.core._utils import get_translations as _get_translations @@ -91,8 +90,7 @@ def pipeline(): if dev == "gpu": output = output.as_cpu() output = [np.array(sample) for sample in output] - if isinstance(data, _tensors.TensorListGPU): - data = data.as_cpu() + data = data.as_cpu() data = [np.array(sample) for sample in data] if modality == "image": diff --git a/dali/test/python/auto_aug/test_rand_augment.py b/dali/test/python/auto_aug/test_rand_augment.py index 514abbec350..7df7dec7b68 100644 --- a/dali/test/python/auto_aug/test_rand_augment.py +++ b/dali/test/python/auto_aug/test_rand_augment.py @@ -21,7 +21,7 @@ from scipy.stats import chisquare from nose2.tools import params -from nvidia.dali import fn, tensors, types +from nvidia.dali import fn, types from nvidia.dali import pipeline_def from nvidia.dali.auto_aug import rand_augment from nvidia.dali.auto_aug.core import augmentation @@ -43,8 +43,7 @@ def debug_discrepancy_helper(*batch_pairs): """ def as_array_list(batch): - if isinstance(batch, tensors.TensorListGPU): - batch = batch.as_cpu() + batch = batch.as_cpu() return [np.array(sample) for sample in batch] batch_names = [name for _, _, name in batch_pairs] diff --git a/dali/test/python/checkpointing/test_dali_stateless_operators.py b/dali/test/python/checkpointing/test_dali_stateless_operators.py index dccba51af8c..e9ac14504ad 100644 --- a/dali/test/python/checkpointing/test_dali_stateless_operators.py +++ b/dali/test/python/checkpointing/test_dali_stateless_operators.py @@ -16,7 +16,6 @@ import glob import numpy as np import itertools -import nvidia.dali as dali from nvidia.dali import fn, pipeline_def, types from test_utils import ( compare_pipelines, @@ -41,9 +40,7 @@ def tensor_list_to_array(tensor_list): - if isinstance(tensor_list, dali.backend_impl.TensorListGPU): - tensor_list = tensor_list.as_cpu() - return tensor_list.as_array() + return tensor_list.as_cpu().as_array() # Check whether a given pipeline is stateless diff --git a/dali/test/python/decoder/test_video.py b/dali/test/python/decoder/test_video.py index d12bd740429..d71614b3776 100644 --- a/dali/test/python/decoder/test_video.py +++ b/dali/test/python/decoder/test_video.py @@ -21,7 +21,6 @@ import os from itertools import cycle from test_utils import get_dali_extra_path, is_mulit_gpu, skip_if_m60 -from nvidia.dali.backend import TensorListGPU from nose2.tools import params from nose_utils import SkipTest, attr, assert_raises @@ -77,8 +76,7 @@ def video_decoder_iter(batch_size, epochs=1, device="cpu", module=fn.experimenta pipe.build() for _ in range(int((epochs * len(files) + batch_size - 1) / batch_size)): (output,) = pipe.run() - if isinstance(output, TensorListGPU): - output = output.as_cpu() + output = output.as_cpu() for i in range(batch_size): yield np.array(output[i]) @@ -89,8 +87,7 @@ def ref_iter(epochs=1, device="cpu"): pipe = reference_pipeline(filename, device=device) pipe.build() (output,) = pipe.run() - if isinstance(output, TensorListGPU): - output = output.as_cpu() + output = output.as_cpu() yield np.array(output[0]) diff --git a/dali/test/python/operator_1/test_arithmetic_ops.py b/dali/test/python/operator_1/test_arithmetic_ops.py index 8f7eb46093d..c6e0d045969 100644 --- a/dali/test/python/operator_1/test_arithmetic_ops.py +++ b/dali/test/python/operator_1/test_arithmetic_ops.py @@ -17,7 +17,6 @@ import nvidia.dali.ops as ops import nvidia.dali.types as types import nvidia.dali.math as math -from nvidia.dali.tensors import TensorListGPU import numpy as np from nose_utils import attr, raises, assert_raises, assert_equals from nose2.tools import params @@ -226,12 +225,6 @@ def default_range(*types): ] -def as_cpu(tl): - if isinstance(tl, TensorListGPU): - return tl.as_cpu() - return tl - - def max_dtype(kind, left_dtype, right_dtype): return np.dtype(kind + str(max(left_dtype.itemsize, right_dtype.itemsize))) @@ -449,8 +442,8 @@ def get_numpy_input(input, kind, orig_type, target_type): def extract_un_data(pipe_out, sample_id, kind, target_type): - input = as_cpu(pipe_out[0]).at(sample_id) - out = as_cpu(pipe_out[1]).at(sample_id) + input = pipe_out[0].at(sample_id).as_cpu() + out = pipe_out[1].at(sample_id).as_cpu() assert_equals(out.dtype, target_type) in_np = get_numpy_input(input, kind, input.dtype.type, target_type) return in_np, out @@ -465,7 +458,7 @@ def extract_data(pipe_out, sample_id, kinds, target_type): arity = len(kinds) inputs = [] for i in range(arity): - dali_in = as_cpu(pipe_out[i]).at(sample_id) + dali_in = pipe_out[i].at(sample_id).as_cpu() numpy_in = get_numpy_input( dali_in, kinds[i], @@ -473,7 +466,7 @@ def extract_data(pipe_out, sample_id, kinds, target_type): target_type if target_type is not None else dali_in.dtype.type, ) inputs.append(numpy_in) - out = as_cpu(pipe_out[arity]).at(sample_id) + out = pipe_out[arity].at(sample_id).as_cpu() return tuple(inputs) + (out,) diff --git a/dali/test/python/operator_1/test_batch_permute.py b/dali/test/python/operator_1/test_batch_permute.py index a388da06154..8db89eb33e2 100644 --- a/dali/test/python/operator_1/test_batch_permute.py +++ b/dali/test/python/operator_1/test_batch_permute.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import nvidia.dali as dali import nvidia.dali.fn as fn from nvidia.dali.pipeline import Pipeline import numpy as np @@ -69,8 +68,7 @@ def _test_permute_batch(device, type): for i in range(10): orig, permuted, idxs = pipe.run() idxs = [int(idxs.at(i)) for i in range(batch_size)] - if isinstance(orig, dali.backend.TensorListGPU): - orig = orig.as_cpu() + orig = orig.as_cpu() ref = [orig.at(idx) for idx in idxs] check_batch(permuted, ref, len(ref), 0, 0, "abc") @@ -91,10 +89,9 @@ def _test_permute_batch_fixed(device): pipe.set_outputs(data, fn.permute_batch(data, indices=idxs)) pipe.build() - for i in range(10): + for _ in range(10): orig, permuted = pipe.run() - if isinstance(orig, dali.backend.TensorListGPU): - orig = orig.as_cpu() + orig = orig.as_cpu() ref = [orig.at(idx) for idx in idxs] check_batch(permuted, ref, len(ref), 0, 0, "abc") diff --git a/dali/test/python/operator_1/test_coin_flip.py b/dali/test/python/operator_1/test_coin_flip.py index f5820ec6d2e..d257f67a681 100644 --- a/dali/test/python/operator_1/test_coin_flip.py +++ b/dali/test/python/operator_1/test_coin_flip.py @@ -14,7 +14,6 @@ import numpy as np import nvidia.dali as dali -from nvidia.dali.backend_impl import TensorListGPU from nvidia.dali.pipeline import Pipeline @@ -53,10 +52,10 @@ def shape_gen_f(): pipe.set_outputs(*outputs) pipe.build() outputs = pipe.run() - data_out = outputs[0].as_cpu() if isinstance(outputs[0], TensorListGPU) else outputs[0] + data_out = outputs[0].as_cpu() shapes_out = None if max_shape is not None: - shapes_out = outputs[1].as_cpu() if isinstance(outputs[1], TensorListGPU) else outputs[1] + shapes_out = outputs[1].as_cpu() p = p if p is not None else 0.5 for i in range(batch_size): data = np.array(data_out[i]) diff --git a/dali/test/python/operator_1/test_coord_flip.py b/dali/test/python/operator_1/test_coord_flip.py index 45344f9fd1a..b409a6891ad 100644 --- a/dali/test/python/operator_1/test_coord_flip.py +++ b/dali/test/python/operator_1/test_coord_flip.py @@ -70,7 +70,7 @@ def check_operator_coord_flip(device, batch_size, layout, shape, center_x, cente for sample in range(batch_size): in_coords = outputs[0].at(sample) if device == "gpu": - out_coords = outputs[1].as_cpu().at(sample) + out_coords = outputs[1].at(sample).as_cpu() else: out_coords = outputs[1].at(sample) if in_coords.shape == () or in_coords.shape[0] == 0: diff --git a/dali/test/python/operator_1/test_crop.py b/dali/test/python/operator_1/test_crop.py index 0832dce4384..bac1f2becff 100644 --- a/dali/test/python/operator_1/test_crop.py +++ b/dali/test/python/operator_1/test_crop.py @@ -16,7 +16,6 @@ from nvidia.dali import pipeline_def, fn import nvidia.dali.ops as ops import nvidia.dali.types as types -import nvidia.dali as dali import numpy as np import os from nose_utils import assert_raises @@ -580,15 +579,10 @@ def check_crop_with_out_of_bounds_policy_support( if fill_values is None: fill_values = 0 pipe.build() - for k in range(3): + for _ in range(3): outs = pipe.run() - out = outs[0] - in_data = outs[1] - if isinstance(out, dali.backend_impl.TensorListGPU): - out = out.as_cpu() - if isinstance(in_data, dali.backend_impl.TensorListGPU): - in_data = in_data.as_cpu() - + out = outs[0].as_cpu() + in_data = outs[1].as_cpu() assert batch_size == len(out) for idx in range(batch_size): sample_in = in_data.at(idx) diff --git a/dali/test/python/operator_1/test_crop_mirror_normalize.py b/dali/test/python/operator_1/test_crop_mirror_normalize.py index 737216ec7b6..23e5af2018a 100644 --- a/dali/test/python/operator_1/test_crop_mirror_normalize.py +++ b/dali/test/python/operator_1/test_crop_mirror_normalize.py @@ -13,7 +13,6 @@ # limitations under the License. import numpy as np -import nvidia.dali as dali import nvidia.dali.fn as fn import nvidia.dali.ops as ops import nvidia.dali.types as types @@ -773,10 +772,8 @@ def check_cmn_with_out_of_bounds_policy_support( out = outs[0] in_data = outs[1] mirror_data = outs[2] - if isinstance(out, dali.backend_impl.TensorListGPU): - out = out.as_cpu() - if isinstance(in_data, dali.backend_impl.TensorListGPU): - in_data = in_data.as_cpu() + out = out.as_cpu() + in_data = in_data.as_cpu() assert batch_size == len(out) for idx in range(batch_size): @@ -907,14 +904,7 @@ def pipe(): for _ in range(3): outs = p.run() for s in range(batch_size): - out, image_like, mean, std = [ - ( - np.array(o[s].as_cpu()) - if isinstance(o, dali.backend_impl.TensorListGPU) - else np.array(o[s]) - ) - for o in outs - ] + out, image_like, mean, std = [np.array(o[s].as_cpu()) for o in outs] ref_scale = scale or 1.0 ref_shift = shift or 0.0 ref_out = ref_scale * (image_like - mean) / std + ref_shift diff --git a/dali/test/python/operator_1/test_input_promotion.py b/dali/test/python/operator_1/test_input_promotion.py index d0acb29d64a..ed252988d1c 100644 --- a/dali/test/python/operator_1/test_input_promotion.py +++ b/dali/test/python/operator_1/test_input_promotion.py @@ -46,7 +46,7 @@ def test_slice_fn(): pipe.build() o = pipe.run() assert np.array_equal(o[0].at(0), np.array([[14], [17]])) - assert np.array_equal(o[1].as_cpu().at(0), np.array([[14], [17]])) + assert np.array_equal(o[1].at(0).as_cpu(), np.array([[14], [17]])) def test_slice_ops(): @@ -62,7 +62,7 @@ def test_slice_ops(): pipe.build() o = pipe.run() assert np.array_equal(o[0].at(0), np.array([[14], [17]])) - assert np.array_equal(o[1].as_cpu().at(0), np.array([[14], [17]])) + assert np.array_equal(o[1].at(0).as_cpu(), np.array([[14], [17]])) def test_python_function(): diff --git a/dali/test/python/operator_1/test_normal_distribution.py b/dali/test/python/operator_1/test_normal_distribution.py index c40084b6656..d24ea511486 100644 --- a/dali/test/python/operator_1/test_normal_distribution.py +++ b/dali/test/python/operator_1/test_normal_distribution.py @@ -13,7 +13,6 @@ # limitations under the License. from nvidia.dali.pipeline import Pipeline -from nvidia.dali.backend_impl import TensorListGPU import nvidia.dali.fn as fn import nvidia.dali.types as types import numpy as np @@ -107,10 +106,7 @@ def shape_gen_f(): pipe.build() for i in range(niter): outputs = pipe.run() - out, shapes, means, stddevs = tuple( - outputs[i].as_cpu() if isinstance(outputs[i], TensorListGPU) else outputs[i] - for i in range(len(outputs)) - ) + out, shapes, means, stddevs = tuple(outputs[i].as_cpu() for i in range(len(outputs))) for sample_idx in range(batch_size): sample = np.array(out[sample_idx]) if sample.shape == (): diff --git a/dali/test/python/operator_1/test_normalize.py b/dali/test/python/operator_1/test_normalize.py index 13db1283560..76893df7e68 100644 --- a/dali/test/python/operator_1/test_normalize.py +++ b/dali/test/python/operator_1/test_normalize.py @@ -13,7 +13,6 @@ # limitations under the License. from nvidia.dali.pipeline import Pipeline -from nvidia.dali import backend import nvidia.dali.ops as ops import numpy as np from test_utils import dali_type @@ -395,8 +394,7 @@ def iter_setup(self): def to_list(tensor_list): - if isinstance(tensor_list, backend.TensorListGPU): - tensor_list = tensor_list.as_cpu() + tensor_list = tensor_list.as_cpu() out = [] for i in range(len(tensor_list)): out.append(tensor_list.at(i)) diff --git a/dali/test/python/operator_1/test_pad.py b/dali/test/python/operator_1/test_pad.py index a629a9fdc5d..95b24910de7 100644 --- a/dali/test/python/operator_1/test_pad.py +++ b/dali/test/python/operator_1/test_pad.py @@ -13,13 +13,11 @@ # limitations under the License. import numpy as np -import nvidia.dali as dali import nvidia.dali.fn as fn import nvidia.dali.math as math import nvidia.dali.ops as ops import nvidia.dali.types as types from nvidia.dali import Pipeline, pipeline_def -from nvidia.dali.backend_impl import TensorListGPU from nose_utils import assert_raises from test_utils import ( @@ -104,7 +102,7 @@ def check_pad(device, batch_size, input_max_shape, axes, axis_names, align, shap for k in range(5): out1, out2 = pipe.run() - out1_data = out1.as_cpu() if isinstance(out1[0], dali.backend_impl.TensorGPU) else out1 + out1_data = out1.as_cpu() max_shape = [-1] * len(input_max_shape) for i in range(len(actual_axes)): @@ -116,7 +114,7 @@ def check_pad(device, batch_size, input_max_shape, axes, axis_names, align, shap if input_shape[dim] > max_shape[dim]: max_shape[dim] = input_shape[dim] - out2_data = out2.as_cpu() if isinstance(out2[0], dali.backend_impl.TensorGPU) else out2 + out2_data = out2.as_cpu() for i in range(batch_size): input_shape = out1_data.at(i).shape output_shape = out2_data.at(i).shape @@ -226,7 +224,7 @@ def check_pad_per_sample_shapes_and_alignment(device="cpu", batch_size=3, ndim=2 ) pipe.build() for _ in range(num_iter): - outs = [out.as_cpu() if isinstance(out, TensorListGPU) else out for out in pipe.run()] + outs = [out.as_cpu() for out in pipe.run()] for i in range(batch_size): in_shape, in_data, req_shape, req_align, out_pad_shape, out_pad_align, out_pad_both = [ outs[out_idx].at(i) for out_idx in range(len(outs)) @@ -266,7 +264,7 @@ def check_pad_to_square(device="cpu", batch_size=3, ndim=2, num_iter=3): pipe.set_outputs(in_data, out_data) pipe.build() for _ in range(num_iter): - outs = [out.as_cpu() if isinstance(out, TensorListGPU) else out for out in pipe.run()] + outs = [out.as_cpu() for out in pipe.run()] for i in range(batch_size): in_data, out_data = [outs[out_idx].at(i) for out_idx in range(len(outs))] in_shape = in_data.shape diff --git a/dali/test/python/operator_2/test_python_function.py b/dali/test/python/operator_2/test_python_function.py index ae8214c49c0..0b85f6bbca9 100644 --- a/dali/test/python/operator_2/test_python_function.py +++ b/dali/test/python/operator_2/test_python_function.py @@ -320,7 +320,7 @@ def test_python_operator_brightness(): (numpy_output,) = numpy_brightness.run() (dali_output,) = dali_brightness.run() for i in range(len(dali_output)): - assert numpy.allclose(numpy_output.at(i), dali_output.as_cpu().at(i), rtol=1e-5, atol=1) + assert numpy.allclose(numpy_output.at(i), dali_output.at(i).as_cpu(), rtol=1e-5, atol=1) def invalid_function(image): diff --git a/dali/test/python/operator_2/test_remap.py b/dali/test/python/operator_2/test_remap.py index 4afd6695fa8..80cd39d4096 100644 --- a/dali/test/python/operator_2/test_remap.py +++ b/dali/test/python/operator_2/test_remap.py @@ -14,7 +14,6 @@ import cv2 import numpy as np -import nvidia.dali as dali import nvidia.dali.fn as fn import os.path import unittest @@ -176,16 +175,8 @@ def _compare_pipelines_pixelwise(self, pipe1, pipe2, N_iterations, eps=0.01): f"Numbers of outputs in the pipelines does not match: {len(out1)} vs {len(out2)}.", ) for i in range(len(out1)): - out1_data = ( - out1[i].as_cpu() - if isinstance(out1[i][0], dali.backend_impl.TensorGPU) - else out1[i] - ) - out2_data = ( - out2[i].as_cpu() - if isinstance(out2[i][0], dali.backend_impl.TensorGPU) - else out2[i] - ) + out1_data = out1[i].as_cpu() + out2_data = out2[i].as_cpu() for sample1, sample2 in zip(out1_data, out2_data): s1 = np.array(sample1) s2 = np.array(sample2) diff --git a/dali/test/python/operator_2/test_resize.py b/dali/test/python/operator_2/test_resize.py index ef1217d6aaa..a8c8099f16f 100644 --- a/dali/test/python/operator_2/test_resize.py +++ b/dali/test/python/operator_2/test_resize.py @@ -571,18 +571,14 @@ def get_output(): print("Requested output", size[i]) assert max_err <= eps - ref_in = dali_in - if isinstance(ref_in, dali.tensors.TensorListGPU): - ref_in = ref_in.as_cpu() # suppress warnings + ref_in = dali_in.as_cpu() pil_pipe.feed_input("images", ref_in, layout=layout_str(dim, channel_first)) pil_pipe.feed_input("size", dali_out_size) pil_pipe.feed_input("roi_start", roi_start) pil_pipe.feed_input("roi_end", roi_end) ref = pil_pipe.run() - dali_resized = o[1] - if isinstance(dali_resized, dali.tensors.TensorListGPU): - dali_resized = dali_resized.as_cpu() + dali_resized = o[1].as_cpu() ref_resized = ref[0] max_avg_err = 0.6 if dim == 3 else 0.4 @@ -874,7 +870,7 @@ def resize_pipe(): pipe = resize_pipe() pipe.build() (outs,) = pipe.run() - out = outs.as_cpu().at(0) + out = outs.at(0).as_cpu() global large_data_resized if large_data_resized is None: large_data_resized = make_cube(350, 224, 224) diff --git a/dali/test/python/operator_2/test_resize_seq.py b/dali/test/python/operator_2/test_resize_seq.py index 551ef5867c2..81b20368000 100644 --- a/dali/test/python/operator_2/test_resize_seq.py +++ b/dali/test/python/operator_2/test_resize_seq.py @@ -42,7 +42,7 @@ def init_video_data(): video_pipe.build() out = video_pipe.run() - in_seq = out[0].as_cpu().at(0) + in_seq = out[0].at(0).as_cpu() return in_seq diff --git a/dali/test/python/operator_2/test_subscript.py b/dali/test/python/operator_2/test_subscript.py index b1b74b05f88..6a0619465c3 100644 --- a/dali/test/python/operator_2/test_subscript.py +++ b/dali/test/python/operator_2/test_subscript.py @@ -38,7 +38,7 @@ def test_plain_indexing(): for i in range(len(inp)): x = inp.at(i) assert np.array_equal(x[1, 1], cpu.at(i)) - assert np.array_equal(x[1, 1], gpu.as_cpu().at(i)) + assert np.array_equal(x[1, 1], gpu.at(i).as_cpu()) def _test_indexing(data_gen, input_layout, output_layout, dali_index_func, ref_index_func=None): @@ -50,7 +50,7 @@ def _test_indexing(data_gen, input_layout, output_layout, dali_index_func, ref_i x = inp.at(i) ref = (ref_index_func or dali_index_func)(x) assert np.array_equal(ref, cpu.at(i)) - assert np.array_equal(ref, gpu.as_cpu().at(i)) + assert np.array_equal(ref, gpu.at(i).as_cpu()) assert cpu.layout() == output_layout assert gpu.layout() == output_layout @@ -94,7 +94,7 @@ def test_swapped_ends(): for i in range(len(inp)): x = inp.at(i) assert np.array_equal(x[2:1], cpu.at(i)) - assert np.array_equal(x[2:1], gpu.as_cpu().at(i)) + assert np.array_equal(x[2:1], gpu.at(i).as_cpu()) def test_noop(): @@ -129,7 +129,7 @@ def data_gen(): j = (j + 1) % len(lo_idxs) k = (k + 1) % len(hi_idxs) assert np.array_equal(ref, cpu.at(i)) - assert np.array_equal(ref, gpu.as_cpu().at(i)) + assert np.array_equal(ref, gpu.at(i).as_cpu()) def test_runtime_stride_dim1(): @@ -154,7 +154,7 @@ def data_gen(): ref = x[::strides[j]] # fmt: on assert np.array_equal(ref, cpu.at(i)) - assert np.array_equal(ref, gpu.as_cpu().at(i)) + assert np.array_equal(ref, gpu.at(i).as_cpu()) j = (j + 1) % len(strides) @@ -180,7 +180,7 @@ def data_gen(): ref = x[:, ::strides[j]] # fmt: on assert np.array_equal(ref, cpu.at(i)) - assert np.array_equal(ref, gpu.as_cpu().at(i)) + assert np.array_equal(ref, gpu.at(i).as_cpu()) j = (j + 1) % len(strides) @@ -304,7 +304,7 @@ def test_multiple_skipped_dims(): for i in range(len(inp)): x = inp.at(i) assert np.array_equal(x[1, :, :, 1], cpu.at(i)) - assert np.array_equal(x[1, :, :, 1], gpu.as_cpu().at(i)) + assert np.array_equal(x[1, :, :, 1], gpu.at(i).as_cpu()) def test_empty_slice(): @@ -316,4 +316,4 @@ def test_empty_slice(): for i in range(len(inp)): x = inp.at(i) assert np.array_equal(x[0:0, 0:1], cpu.at(i)) - assert np.array_equal(x[0:0, 0:1], gpu.as_cpu().at(i)) + assert np.array_equal(x[0:0, 0:1], gpu.at(i).as_cpu()) diff --git a/dali/test/python/operator_2/test_uniform.py b/dali/test/python/operator_2/test_uniform.py index 20c18ab04e5..906bd477dde 100644 --- a/dali/test/python/operator_2/test_uniform.py +++ b/dali/test/python/operator_2/test_uniform.py @@ -14,7 +14,6 @@ import nvidia.dali as dali from nvidia.dali.pipeline import Pipeline -from nvidia.dali.backend_impl import TensorListGPU import numpy as np import scipy.stats as st @@ -27,7 +26,7 @@ def check_uniform_default(device="cpu", batch_size=32, shape=[1e5], val_range=No for it in range(niter): outputs = pipe.run() val_range = (-1.0, 1.0) if val_range is None else val_range - data_out = outputs[0].as_cpu() if isinstance(outputs[0], TensorListGPU) else outputs[0] + data_out = outputs[0].as_cpu() pvs = [] for i in range(batch_size): data = np.array(data_out[i]) @@ -68,7 +67,7 @@ def check_uniform_continuous_next_after(device="cpu", batch_size=32, shape=[1e5] pipe.build() for it in range(niter): outputs = pipe.run() - data_out = outputs[0].as_cpu() if isinstance(outputs[0], TensorListGPU) else outputs[0] + data_out = outputs[0].as_cpu() for i in range(batch_size): data = np.array(data_out[i]) assert (val_range[0] == data).all(), f"{data} is outside of requested range" @@ -89,7 +88,7 @@ def check_uniform_discrete(device="cpu", batch_size=32, shape=[1e5], values=None pipe.build() for it in range(niter): outputs = pipe.run() - data_out = outputs[0].as_cpu() if isinstance(outputs[0], TensorListGPU) else outputs[0] + data_out = outputs[0].as_cpu() values_set = set(values) maxval = np.max(values) bins = np.concatenate([values, np.array([np.nextafter(maxval, maxval + 1)])]) diff --git a/dali/test/python/operator_2/test_warp.py b/dali/test/python/operator_2/test_warp.py index 9355f4a8e52..2f40922880a 100644 --- a/dali/test/python/operator_2/test_warp.py +++ b/dali/test/python/operator_2/test_warp.py @@ -299,7 +299,7 @@ def get_data(): if device == "cpu": out = out.at(0) else: - out = out.as_cpu().at(0) + out = out.at(0).as_cpu() assert out.shape == (out_size, out_size, channels) for c in range(channels): assert out[0, 0, c] == c diff --git a/dali/test/python/sequences_test_utils.py b/dali/test/python/sequences_test_utils.py index 7074ae5e386..41d9567b985 100644 --- a/dali/test/python/sequences_test_utils.py +++ b/dali/test/python/sequences_test_utils.py @@ -21,7 +21,6 @@ from nvidia.dali import pipeline_def import nvidia.dali.fn as fn from nvidia.dali import types -import nvidia.dali.tensors as _Tensors from test_utils import get_dali_extra_path, check_batch @@ -185,8 +184,7 @@ def arg_data_node(arg_data: ArgData): def as_batch(tensor): - if isinstance(tensor, _Tensors.TensorListGPU): - tensor = tensor.as_cpu() + tensor = tensor.as_cpu() return [np.array(sample, dtype=types.to_numpy_type(sample.dtype)) for sample in tensor] diff --git a/dali/test/python/test_backend_impl_torch_dlpack.py b/dali/test/python/test_backend_impl_torch_dlpack.py index 6e918e09ee4..4fa8ab94d6d 100644 --- a/dali/test/python/test_backend_impl_torch_dlpack.py +++ b/dali/test/python/test_backend_impl_torch_dlpack.py @@ -23,10 +23,7 @@ def convert_to_torch(tensor, device="cuda", dtype=None, size=None): if size is None: - if isinstance(tensor, TensorListCPU) or isinstance(tensor, TensorListGPU): - t = tensor.as_tensor() - else: - t = tensor + t = tensor.as_tensor() size = t.shape() dali_torch_tensor = torch.empty(size=size, device=device, dtype=dtype) c_type_pointer = ctypes.c_void_p(dali_torch_tensor.data_ptr()) diff --git a/dali/test/python/test_optical_flow.py b/dali/test/python/test_optical_flow.py index 61b57da3aa2..7a7cea8cf93 100644 --- a/dali/test/python/test_optical_flow.py +++ b/dali/test/python/test_optical_flow.py @@ -268,7 +268,7 @@ def check_optflow(output_grid=1, hint_grid=1, use_temporal_hints=False): out = pipe.run() for i in range(batch_size): seq = out[0].at(i) - out_field = out[1].as_cpu().at(i)[0] + out_field = out[1].at(i).as_cpu()[0] _, ref_field = get_mapping(seq.shape[1:3]) dsize = (out_field.shape[1], out_field.shape[0]) ref_field = cv2.resize(ref_field, dsize=dsize, interpolation=cv2.INTER_AREA) diff --git a/dali/test/python/test_pipeline.py b/dali/test/python/test_pipeline.py index a64f481c15d..010c6c0cdb2 100644 --- a/dali/test/python/test_pipeline.py +++ b/dali/test/python/test_pipeline.py @@ -696,7 +696,7 @@ def define_graph(self): borderValue=(128, 128, 128), flags=(cv2.WARP_INVERSE_MAP + cv2.INTER_LINEAR), ) - dali_output = pipe_out[2].as_cpu().at(i) + dali_output = pipe_out[2].at(i).as_cpu() maxdif = np.max(cv2.absdiff(out, dali_output) / 255.0) assert maxdif < 0.025 diff --git a/dali/test/python/test_utils.py b/dali/test/python/test_utils.py index a8ce3519a8f..55823c4f087 100644 --- a/dali/test/python/test_utils.py +++ b/dali/test/python/test_utils.py @@ -14,7 +14,7 @@ import nvidia.dali as dali import nvidia.dali.types as dali_types -from nvidia.dali.backend_impl import TensorListGPU, TensorGPU, TensorListCPU +from nvidia.dali.backend_impl import TensorListCPU from nvidia.dali import plugin_manager import functools @@ -275,10 +275,8 @@ def is_error(mean_err, max_err, eps, max_allowed_error): return False import_numpy() - if isinstance(batch1, dali.backend_impl.TensorListGPU): - batch1 = batch1.as_cpu() - if isinstance(batch2, dali.backend_impl.TensorListGPU): - batch2 = batch2.as_cpu() + batch1 = batch1.as_cpu() + batch2 = batch2.as_cpu() if batch_size is None: batch_size = len(batch1) @@ -384,12 +382,8 @@ def compare_pipelines( out2 = pipe2.run() assert len(out1) == len(out2) for i in range(len(out1)): - out1_data = ( - out1[i].as_cpu() if isinstance(out1[i][0], dali.backend_impl.TensorGPU) else out1[i] - ) - out2_data = ( - out2[i].as_cpu() if isinstance(out2[i][0], dali.backend_impl.TensorGPU) else out2[i] - ) + out1_data = out1[i].as_cpu() + out2_data = out2[i].as_cpu() if isinstance(expected_layout, tuple): current_expected_layout = expected_layout[i] else: @@ -541,8 +535,7 @@ def check_output(outputs, ref_out, ref_is_list_of_outputs=None): for idx in range(len(outputs)): out = outputs[idx] ref = ref_out[idx] if ref_is_list_of_outputs else ref_out - if isinstance(out, dali.backend_impl.TensorListGPU): - out = out.as_cpu() + out = out.as_cpu() for i in range(len(out)): if not np.array_equal(out[i], ref[i]): print("Mismatch at sample", i) @@ -722,8 +715,7 @@ def update(self, val, n=1): def to_array(dali_out): import_numpy() - if isinstance(dali_out, (TensorGPU, TensorListGPU)): - dali_out = dali_out.as_cpu() + dali_out = dali_out.as_cpu() if isinstance(dali_out, TensorListCPU): dali_out = dali_out.as_array() return np.array(dali_out) @@ -858,7 +850,7 @@ def gen(): def as_array(tensor): import_numpy() - return np.array(tensor.as_cpu() if isinstance(tensor, TensorGPU) else tensor) + return np.array(tensor.as_cpu()) def python_function(*inputs, function, **kwargs):