From f98babfffd264259c7cf2759705bf3ed3cc57158 Mon Sep 17 00:00:00 2001 From: Joaquin Anton Guirao Date: Thu, 19 Dec 2024 08:46:32 +0100 Subject: [PATCH] Fixes Signed-off-by: Joaquin Anton Guirao --- .../python/operator_1/test_arithmetic_ops.py | 8 ++--- dali/test/python/operator_1/test_coin_flip.py | 3 +- .../test/python/operator_1/test_coord_flip.py | 16 ++++------ dali/test/python/operator_1/test_crop.py | 4 +-- .../operator_1/test_crop_mirror_normalize.py | 14 +++----- .../python/operator_1/test_input_promotion.py | 12 +++---- dali/test/python/operator_1/test_pad.py | 17 ++++------ .../python/operator_2/test_python_function.py | 4 +-- dali/test/python/operator_2/test_remap.py | 8 ++--- dali/test/python/operator_2/test_resize.py | 2 +- .../test/python/operator_2/test_resize_seq.py | 2 +- dali/test/python/operator_2/test_subscript.py | 32 +++++++++---------- dali/test/python/operator_2/test_uniform.py | 15 ++++----- dali/test/python/operator_2/test_warp.py | 7 ++-- dali/test/python/test_optical_flow.py | 6 ++-- dali/test/python/test_pipeline.py | 5 ++- dali/test/python/test_utils.py | 8 ++--- 17 files changed, 70 insertions(+), 93 deletions(-) diff --git a/dali/test/python/operator_1/test_arithmetic_ops.py b/dali/test/python/operator_1/test_arithmetic_ops.py index c6e0d04596..11d4892d8c 100644 --- a/dali/test/python/operator_1/test_arithmetic_ops.py +++ b/dali/test/python/operator_1/test_arithmetic_ops.py @@ -442,8 +442,8 @@ def get_numpy_input(input, kind, orig_type, target_type): def extract_un_data(pipe_out, sample_id, kind, target_type): - input = pipe_out[0].at(sample_id).as_cpu() - out = pipe_out[1].at(sample_id).as_cpu() + input = np.array(pipe_out[0].at(sample_id).as_cpu()) + out = np.array(pipe_out[1].at(sample_id).as_cpu()) assert_equals(out.dtype, target_type) in_np = get_numpy_input(input, kind, input.dtype.type, target_type) return in_np, out @@ -458,7 +458,7 @@ def extract_data(pipe_out, sample_id, kinds, target_type): arity = len(kinds) inputs = [] for i in range(arity): - dali_in = pipe_out[i].at(sample_id).as_cpu() + dali_in = np.array(pipe_out[i].at(sample_id).as_cpu()) numpy_in = get_numpy_input( dali_in, kinds[i], @@ -466,7 +466,7 @@ def extract_data(pipe_out, sample_id, kinds, target_type): target_type if target_type is not None else dali_in.dtype.type, ) inputs.append(numpy_in) - out = pipe_out[arity].at(sample_id).as_cpu() + out = np.array(pipe_out[arity].at(sample_id).as_cpu()) return tuple(inputs) + (out,) diff --git a/dali/test/python/operator_1/test_coin_flip.py b/dali/test/python/operator_1/test_coin_flip.py index d257f67a68..7239573cb1 100644 --- a/dali/test/python/operator_1/test_coin_flip.py +++ b/dali/test/python/operator_1/test_coin_flip.py @@ -51,8 +51,7 @@ def shape_gen_f(): outputs += [shape_out] pipe.set_outputs(*outputs) pipe.build() - outputs = pipe.run() - data_out = outputs[0].as_cpu() + data_out, = tuple(out.as_cpu() for out in pipe.run()) shapes_out = None if max_shape is not None: shapes_out = outputs[1].as_cpu() diff --git a/dali/test/python/operator_1/test_coord_flip.py b/dali/test/python/operator_1/test_coord_flip.py index b409a6891a..1cfac90d88 100644 --- a/dali/test/python/operator_1/test_coord_flip.py +++ b/dali/test/python/operator_1/test_coord_flip.py @@ -66,22 +66,20 @@ def check_operator_coord_flip(device, batch_size, layout, shape, center_x, cente pipe = CoordFlipPipeline(device, batch_size, iter(eii1), layout, center_x, center_y, center_z) pipe.build() for i in range(30): - outputs = pipe.run() + outputs0, outputs1, outputs2, outputs3, outputs4 = pipe.run() + outputs1 = outputs1.as_cpu() for sample in range(batch_size): - in_coords = outputs[0].at(sample) - if device == "gpu": - out_coords = outputs[1].at(sample).as_cpu() - else: - out_coords = outputs[1].at(sample) + in_coords = outputs0.at(sample) + out_coords = outputs1.at(sample) if in_coords.shape == () or in_coords.shape[0] == 0: assert out_coords.shape == () or out_coords.shape[0] == 0 continue - flip_x = outputs[2].at(sample) - flip_y = outputs[3].at(sample) + flip_x = outputs2.at(sample) + flip_y = outputs3.at(sample) flip_z = None if len(layout) == 3: - flip_z = outputs[4].at(sample) + flip_z = outputs4.at(sample) _, ndim = in_coords.shape flip_dim = [flip_x, flip_y] diff --git a/dali/test/python/operator_1/test_crop.py b/dali/test/python/operator_1/test_crop.py index bac1f2becf..cf130de455 100644 --- a/dali/test/python/operator_1/test_crop.py +++ b/dali/test/python/operator_1/test_crop.py @@ -580,9 +580,7 @@ def check_crop_with_out_of_bounds_policy_support( fill_values = 0 pipe.build() for _ in range(3): - outs = pipe.run() - out = outs[0].as_cpu() - in_data = outs[1].as_cpu() + out, in_data = tuple(out.as_cpu() for out in pipe.run()) assert batch_size == len(out) for idx in range(batch_size): sample_in = in_data.at(idx) diff --git a/dali/test/python/operator_1/test_crop_mirror_normalize.py b/dali/test/python/operator_1/test_crop_mirror_normalize.py index 23e5af2018..e273a61560 100644 --- a/dali/test/python/operator_1/test_crop_mirror_normalize.py +++ b/dali/test/python/operator_1/test_crop_mirror_normalize.py @@ -767,11 +767,8 @@ def check_cmn_with_out_of_bounds_policy_support( if fill_values is None: fill_values = 0 pipe.build() - for k in range(3): - outs = pipe.run() - out = outs[0] - in_data = outs[1] - mirror_data = outs[2] + for _ in range(3): + out, in_data, mirror_data = pipe.run() out = out.as_cpu() in_data = in_data.as_cpu() @@ -902,13 +899,12 @@ def pipe(): p = pipe(batch_size=batch_size) p.build() for _ in range(3): - outs = p.run() - for s in range(batch_size): - out, image_like, mean, std = [np.array(o[s].as_cpu()) for o in outs] + out, image_like, mean, std = tuple(out.as_cpu() for out in p.run()) ref_scale = scale or 1.0 ref_shift = shift or 0.0 ref_out = ref_scale * (image_like - mean) / std + ref_shift - np.testing.assert_allclose(out, ref_out, atol=ref_scale * 1e-6) + for s in range(batch_size): + np.testing.assert_allclose(out.at(s), ref_out.at(s), atol=ref_scale * 1e-6) def test_per_sample_norm_args(): diff --git a/dali/test/python/operator_1/test_input_promotion.py b/dali/test/python/operator_1/test_input_promotion.py index ed252988d1..a08ae4d408 100644 --- a/dali/test/python/operator_1/test_input_promotion.py +++ b/dali/test/python/operator_1/test_input_promotion.py @@ -44,9 +44,9 @@ def test_slice_fn(): out_gpu = fn.slice(src.gpu(), np.array([1, 1]), np.array([2, 1]), axes=[0, 1]) pipe.set_outputs(out_cpu, out_gpu) pipe.build() - o = pipe.run() - assert np.array_equal(o[0].at(0), np.array([[14], [17]])) - assert np.array_equal(o[1].at(0).as_cpu(), np.array([[14], [17]])) + out0, out1 = tuple(out.as_cpu() for out in pipe.run()) + assert np.array_equal(out0.at(0), np.array([[14], [17]])) + assert np.array_equal(np.array(out1.at(0)), np.array([[14], [17]])) def test_slice_ops(): @@ -60,9 +60,9 @@ def test_slice_ops(): out_gpu = slice_gpu(src.gpu(), np.array([1, 1]), np.array([2, 1])) pipe.set_outputs(out_cpu, out_gpu) pipe.build() - o = pipe.run() - assert np.array_equal(o[0].at(0), np.array([[14], [17]])) - assert np.array_equal(o[1].at(0).as_cpu(), np.array([[14], [17]])) + out0, out1 = tuple(out.as_cpu() for out in pipe.run()) + assert np.array_equal(out0.at(0), np.array([[14], [17]])) + assert np.array_equal(out1.at(0), np.array([[14], [17]])) def test_python_function(): diff --git a/dali/test/python/operator_1/test_pad.py b/dali/test/python/operator_1/test_pad.py index 95b24910de..ced09c907c 100644 --- a/dali/test/python/operator_1/test_pad.py +++ b/dali/test/python/operator_1/test_pad.py @@ -99,10 +99,8 @@ def check_pad(device, batch_size, input_max_shape, axes, axis_names, align, shap align = [align[0] for _ in actual_axes] assert len(align) == len(actual_axes) - for k in range(5): - out1, out2 = pipe.run() - - out1_data = out1.as_cpu() + for _ in range(5): + out0, out1 = tuple(out.as_cpu() for out in pipe.run()) max_shape = [-1] * len(input_max_shape) for i in range(len(actual_axes)): @@ -110,14 +108,13 @@ def check_pad(device, batch_size, input_max_shape, axes, axis_names, align, shap align_val = align[i] shape_arg_val = shape_arg[i] for i in range(batch_size): - input_shape = out1_data.at(i).shape + input_shape = out0.at(i).shape if input_shape[dim] > max_shape[dim]: max_shape[dim] = input_shape[dim] - out2_data = out2.as_cpu() for i in range(batch_size): - input_shape = out1_data.at(i).shape - output_shape = out2_data.at(i).shape + input_shape = out0.at(i).shape + output_shape = out1.at(i).shape for j in range(len(actual_axes)): dim = actual_axes[j] @@ -224,7 +221,7 @@ def check_pad_per_sample_shapes_and_alignment(device="cpu", batch_size=3, ndim=2 ) pipe.build() for _ in range(num_iter): - outs = [out.as_cpu() for out in pipe.run()] + outs = tuple(out.as_cpu() for out in pipe.run()) for i in range(batch_size): in_shape, in_data, req_shape, req_align, out_pad_shape, out_pad_align, out_pad_both = [ outs[out_idx].at(i) for out_idx in range(len(outs)) @@ -264,7 +261,7 @@ def check_pad_to_square(device="cpu", batch_size=3, ndim=2, num_iter=3): pipe.set_outputs(in_data, out_data) pipe.build() for _ in range(num_iter): - outs = [out.as_cpu() for out in pipe.run()] + outs = tuple(out.as_cpu() for out in pipe.run()) for i in range(batch_size): in_data, out_data = [outs[out_idx].at(i) for out_idx in range(len(outs))] in_shape = in_data.shape diff --git a/dali/test/python/operator_2/test_python_function.py b/dali/test/python/operator_2/test_python_function.py index 0b85f6bbca..81f80254c6 100644 --- a/dali/test/python/operator_2/test_python_function.py +++ b/dali/test/python/operator_2/test_python_function.py @@ -27,7 +27,7 @@ from PIL import Image, ImageEnhance from nvidia.dali.ops import _DataNode from nose2.tools import params - +import numpy as np from nose_utils import raises from test_utils import get_dali_extra_path, np_type_to_dali @@ -320,7 +320,7 @@ def test_python_operator_brightness(): (numpy_output,) = numpy_brightness.run() (dali_output,) = dali_brightness.run() for i in range(len(dali_output)): - assert numpy.allclose(numpy_output.at(i), dali_output.at(i).as_cpu(), rtol=1e-5, atol=1) + assert numpy.allclose(numpy_output.at(i), np.array(dali_output.at(i).as_cpu()), rtol=1e-5, atol=1) def invalid_function(image): diff --git a/dali/test/python/operator_2/test_remap.py b/dali/test/python/operator_2/test_remap.py index 80cd39d409..3697a01940 100644 --- a/dali/test/python/operator_2/test_remap.py +++ b/dali/test/python/operator_2/test_remap.py @@ -168,16 +168,14 @@ def _compare_pipelines_pixelwise(self, pipe1, pipe2, N_iterations, eps=0.01): pipe1.build() pipe2.build() for _ in range(N_iterations): - out1 = pipe1.run() - out2 = pipe2.run() + out1 = tuple(out.as_cpu() for out in pipe1.run()) + out2 = tuple(out.as_cpu() for out in pipe2.run()) self.assertTrue( len(out1) == len(out2), f"Numbers of outputs in the pipelines does not match: {len(out1)} vs {len(out2)}.", ) for i in range(len(out1)): - out1_data = out1[i].as_cpu() - out2_data = out2[i].as_cpu() - for sample1, sample2 in zip(out1_data, out2_data): + for sample1, sample2 in zip(out1[i], out2[i]): s1 = np.array(sample1) s2 = np.array(sample2) self.assertTrue( diff --git a/dali/test/python/operator_2/test_resize.py b/dali/test/python/operator_2/test_resize.py index a8c8099f16..4a789e7c85 100644 --- a/dali/test/python/operator_2/test_resize.py +++ b/dali/test/python/operator_2/test_resize.py @@ -870,7 +870,7 @@ def resize_pipe(): pipe = resize_pipe() pipe.build() (outs,) = pipe.run() - out = outs.at(0).as_cpu() + out = np.array(outs.at(0).as_cpu()) global large_data_resized if large_data_resized is None: large_data_resized = make_cube(350, 224, 224) diff --git a/dali/test/python/operator_2/test_resize_seq.py b/dali/test/python/operator_2/test_resize_seq.py index 81b2036800..ba79e31758 100644 --- a/dali/test/python/operator_2/test_resize_seq.py +++ b/dali/test/python/operator_2/test_resize_seq.py @@ -42,7 +42,7 @@ def init_video_data(): video_pipe.build() out = video_pipe.run() - in_seq = out[0].at(0).as_cpu() + in_seq = np.array(out[0].at(0).as_cpu()) return in_seq diff --git a/dali/test/python/operator_2/test_subscript.py b/dali/test/python/operator_2/test_subscript.py index 6a0619465c..fe1eb72925 100644 --- a/dali/test/python/operator_2/test_subscript.py +++ b/dali/test/python/operator_2/test_subscript.py @@ -34,23 +34,23 @@ def test_plain_indexing(): src = fn.external_source(lambda: data, layout="AB") pipe = index_pipe(src, lambda x: x[1, 1]) pipe.build() - inp, cpu, gpu = pipe.run() + inp, cpu, gpu = tuple(out.as_cpu() for out in pipe.run()) for i in range(len(inp)): x = inp.at(i) assert np.array_equal(x[1, 1], cpu.at(i)) - assert np.array_equal(x[1, 1], gpu.at(i).as_cpu()) + assert np.array_equal(x[1, 1], np.array(gpu.at(i))) def _test_indexing(data_gen, input_layout, output_layout, dali_index_func, ref_index_func=None): src = fn.external_source(data_gen, layout=input_layout) pipe = index_pipe(src, dali_index_func) pipe.build() - inp, cpu, gpu = pipe.run() + inp, cpu, gpu = tuple(out.as_cpu() for out in pipe.run()) for i in range(len(inp)): x = inp.at(i) ref = (ref_index_func or dali_index_func)(x) assert np.array_equal(ref, cpu.at(i)) - assert np.array_equal(ref, gpu.at(i).as_cpu()) + assert np.array_equal(ref, np.array(gpu.at(i))) assert cpu.layout() == output_layout assert gpu.layout() == output_layout @@ -90,11 +90,11 @@ def test_swapped_ends(): src = fn.external_source(lambda: data) pipe = index_pipe(src, lambda x: x[2:1]) pipe.build() - inp, cpu, gpu = pipe.run() + inp, cpu, gpu = tuple(out.as_cpu() for out in pipe.run()) for i in range(len(inp)): x = inp.at(i) assert np.array_equal(x[2:1], cpu.at(i)) - assert np.array_equal(x[2:1], gpu.at(i).as_cpu()) + assert np.array_equal(x[2:1], np.array(gpu.at(i))) def test_noop(): @@ -120,7 +120,7 @@ def data_gen(): j = 0 k = 0 for _ in range(4): - inp, cpu, gpu = pipe.run() + inp, cpu, gpu = tuple(out.as_cpu() for out in pipe.run()) for i in range(len(inp)): x = inp.at(i) # fmt: off @@ -129,7 +129,7 @@ def data_gen(): j = (j + 1) % len(lo_idxs) k = (k + 1) % len(hi_idxs) assert np.array_equal(ref, cpu.at(i)) - assert np.array_equal(ref, gpu.at(i).as_cpu()) + assert np.array_equal(ref, np.array(gpu.at(i))) def test_runtime_stride_dim1(): @@ -147,14 +147,14 @@ def data_gen(): j = 0 for _ in range(4): - inp, cpu, gpu = pipe.run() + inp, cpu, gpu = tuple(out.as_cpu() for out in pipe.run()) for i in range(len(inp)): x = inp.at(i) # fmt: off ref = x[::strides[j]] # fmt: on assert np.array_equal(ref, cpu.at(i)) - assert np.array_equal(ref, gpu.at(i).as_cpu()) + assert np.array_equal(ref, np.array(gpu.at(i))) j = (j + 1) % len(strides) @@ -173,14 +173,14 @@ def data_gen(): j = 0 for _ in range(4): - inp, cpu, gpu = pipe.run() + inp, cpu, gpu = tuple(out.as_cpu() for out in pipe.run()) for i in range(len(inp)): x = inp.at(i) # fmt: off ref = x[:, ::strides[j]] # fmt: on assert np.array_equal(ref, cpu.at(i)) - assert np.array_equal(ref, gpu.at(i).as_cpu()) + assert np.array_equal(ref, np.array(gpu.at(i))) j = (j + 1) % len(strides) @@ -300,11 +300,11 @@ def test_multiple_skipped_dims(): src = fn.external_source(lambda: data, layout="ABCD") pipe = index_pipe(src, lambda x: x[1, :, :, 1]) pipe.build() - inp, cpu, gpu = pipe.run() + inp, cpu, gpu = tuple(out.as_cpu() for out in pipe.run()) for i in range(len(inp)): x = inp.at(i) assert np.array_equal(x[1, :, :, 1], cpu.at(i)) - assert np.array_equal(x[1, :, :, 1], gpu.at(i).as_cpu()) + assert np.array_equal(x[1, :, :, 1], np.array(gpu.at(i))) def test_empty_slice(): @@ -312,8 +312,8 @@ def test_empty_slice(): src = fn.external_source(lambda: data) pipe = index_pipe(src, lambda x: x[0:0, 0:1]) pipe.build() - inp, cpu, gpu = pipe.run() + inp, cpu, gpu = tuple(out.as_cpu() for out in pipe.run()) for i in range(len(inp)): x = inp.at(i) assert np.array_equal(x[0:0, 0:1], cpu.at(i)) - assert np.array_equal(x[0:0, 0:1], gpu.at(i).as_cpu()) + assert np.array_equal(x[0:0, 0:1], np.array(gpu.at(i))) diff --git a/dali/test/python/operator_2/test_uniform.py b/dali/test/python/operator_2/test_uniform.py index 906bd477dd..beea84a680 100644 --- a/dali/test/python/operator_2/test_uniform.py +++ b/dali/test/python/operator_2/test_uniform.py @@ -23,10 +23,9 @@ def check_uniform_default(device="cpu", batch_size=32, shape=[1e5], val_range=No with pipe: pipe.set_outputs(dali.fn.random.uniform(device=device, range=val_range, shape=shape)) pipe.build() - for it in range(niter): - outputs = pipe.run() + for _ in range(niter): + data_out, = tuple(out.as_cpu() for out in pipe.run()) val_range = (-1.0, 1.0) if val_range is None else val_range - data_out = outputs[0].as_cpu() pvs = [] for i in range(batch_size): data = np.array(data_out[i]) @@ -65,9 +64,8 @@ def check_uniform_continuous_next_after(device="cpu", batch_size=32, shape=[1e5] with pipe: pipe.set_outputs(dali.fn.random.uniform(device=device, range=val_range, shape=shape)) pipe.build() - for it in range(niter): - outputs = pipe.run() - data_out = outputs[0].as_cpu() + for _ in range(niter): + data_out, = tuple(out.as_cpu() for out in pipe.run()) for i in range(batch_size): data = np.array(data_out[i]) assert (val_range[0] == data).all(), f"{data} is outside of requested range" @@ -86,9 +84,8 @@ def check_uniform_discrete(device="cpu", batch_size=32, shape=[1e5], values=None with pipe: pipe.set_outputs(dali.fn.random.uniform(device=device, values=values, shape=shape)) pipe.build() - for it in range(niter): - outputs = pipe.run() - data_out = outputs[0].as_cpu() + for _ in range(niter): + data_out, = tuple(out.as_cpu() for out in pipe.run()) values_set = set(values) maxval = np.max(values) bins = np.concatenate([values, np.array([np.nextafter(maxval, maxval + 1)])]) diff --git a/dali/test/python/operator_2/test_warp.py b/dali/test/python/operator_2/test_warp.py index 2f40922880..5364b2eb51 100644 --- a/dali/test/python/operator_2/test_warp.py +++ b/dali/test/python/operator_2/test_warp.py @@ -287,7 +287,7 @@ def get_data(): out = None try: - (out,) = pipe.run() + (out,) = tuple(out.as_cpu() for out in pipe.run()) except RuntimeError as e: if "bad_alloc" in str(e): print("Skipping test due to out-of-memory error:", e) @@ -296,10 +296,7 @@ def get_data(): except MemoryError as e: print("Skipping test due to out-of-memory error:", e) return - if device == "cpu": - out = out.at(0) - else: - out = out.at(0).as_cpu() + out = out.at(0) assert out.shape == (out_size, out_size, channels) for c in range(channels): assert out[0, 0, c] == c diff --git a/dali/test/python/test_optical_flow.py b/dali/test/python/test_optical_flow.py index 7a7cea8cf9..da845f5b6f 100644 --- a/dali/test/python/test_optical_flow.py +++ b/dali/test/python/test_optical_flow.py @@ -265,10 +265,10 @@ def check_optflow(output_grid=1, hint_grid=1, use_temporal_hints=False): raise SkipTest("Skipped as hint grid size is not supported for this arch") for _ in range(2): - out = pipe.run() + out0, out1 = tuple(out.as_cpu() for out in pipe.run()) for i in range(batch_size): - seq = out[0].at(i) - out_field = out[1].at(i).as_cpu()[0] + seq = out0.at(i) + out_field = out1.at(i)[0] _, ref_field = get_mapping(seq.shape[1:3]) dsize = (out_field.shape[1], out_field.shape[0]) ref_field = cv2.resize(ref_field, dsize=dsize, interpolation=cv2.INTER_AREA) diff --git a/dali/test/python/test_pipeline.py b/dali/test/python/test_pipeline.py index 010c6c0cdb..1d46c2e0ee 100644 --- a/dali/test/python/test_pipeline.py +++ b/dali/test/python/test_pipeline.py @@ -680,10 +680,9 @@ def define_graph(self): pipe = HybridPipe(batch_size=128, num_threads=2, device_id=0) pipe.build() - pipe_out = pipe.run() + _, orig_cpu, dali_output_batch = tuple(out.as_cpu() for out in pipe.run()) import cv2 - orig_cpu = pipe_out[1].as_cpu() for i in range(128): orig = orig_cpu.at(i) # apply 0.5 correction for opencv's not-so-good notion of pixel centers @@ -696,7 +695,7 @@ def define_graph(self): borderValue=(128, 128, 128), flags=(cv2.WARP_INVERSE_MAP + cv2.INTER_LINEAR), ) - dali_output = pipe_out[2].at(i).as_cpu() + dali_output = dali_output_batch.at(i) maxdif = np.max(cv2.absdiff(out, dali_output) / 255.0) assert maxdif < 0.025 diff --git a/dali/test/python/test_utils.py b/dali/test/python/test_utils.py index 55823c4f08..33863242b1 100644 --- a/dali/test/python/test_utils.py +++ b/dali/test/python/test_utils.py @@ -378,12 +378,10 @@ def compare_pipelines( pipe1.build() pipe2.build() for _ in range(N_iterations): - out1 = pipe1.run() - out2 = pipe2.run() + out1 = tuple(out.as_cpu() for out in pipe1.run()) + out2 = tuple(out.as_cpu() for out in pipe2.run()) assert len(out1) == len(out2) - for i in range(len(out1)): - out1_data = out1[i].as_cpu() - out2_data = out2[i].as_cpu() + for out1_data, out2_data in zip(out1, out2): if isinstance(expected_layout, tuple): current_expected_layout = expected_layout[i] else: