You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
types {
}
entry("main",
{as: [][][]f32},
{*[][][]f32})
entry_main (as_mem_5569 : mem@device,
n₁_5381 : i64,
n₃_5382 : i64,
m₄_5383 : i64,
as_5384 : [n₁_5381][n₃_5382][m₄_5383]f32@as_mem_5569->
{offset: 0i64;
strides: [mul_nw64 (m₄_5383) (n₃_5382), m₄_5383, 1i64];
shape: [n₁_5381, n₃_5382, m₄_5383]})
: {mem@device,
*[n₁_5381][n₃_5382][m₄_5383]f32@ ?0@device->
{offset: 0i64;
strides: [mul_nw64 (m₄_5383) (n₃_5382), m₄_5383, 1i64];
shape: [n₁_5381, n₃_5382, m₄_5383]}} = {
let {j_m_i_5505 : i64} =sub64(n₃_5382, 1i64)
let {empty_slice_5506 : bool} =eq_i64(j_m_i_5505, 0i64)
let {zero_leq_i_p_m_t_s_5507 : bool} =sle64(0i64, j_m_i_5505)
let {i_lte_j_5508 : bool} =sle64(1i64, n₃_5382)
let {loop_nonempty_5509 : bool} =slt64(0i64, n₃_5382)
let {loop_not_taken_5510 : bool} =notloop_nonempty_5509let {i_p_m_t_s_leq_w_5513 : bool} =slt64(j_m_i_5505, n₃_5382)
let {y_5514 : bool} =logand(zero_leq_i_p_m_t_s_5507, i_p_m_t_s_leq_w_5513)
let {forwards_ok_5515 : bool} =logand(i_lte_j_5508, y_5514)
let {ok_or_empty_5516 : bool} =logor(empty_slice_5506, forwards_ok_5515)
let {protect_assert_disj_5517 : bool} =logor(loop_not_taken_5510, ok_or_empty_5516)
let {index_certs_5518 : unit} =assert(protect_assert_disj_5517, {"Index [", 1i64 : i64, ":", n₃_5382 : i64, "] out of bounds for array of shape [", n₃_5382 : i64, "]."}, "tests/issue811.fut:7:13-19")
let {binop_y_5570 : i64} =sub_nw64(n₁_5381, 1i64)
let {binop_x_5572 : i64} =smax64(0i64, binop_y_5570)
let {binop_x_5574 : i64} =smax64(0i64, j_m_i_5505)
let {binop_y_5575 : i64} =mul_nw64(n₁_5381, m₄_5383)
let {binop_y_5576 : i64} =mul_nw64(binop_x_5574, binop_y_5575)
let {binop_y_5577 : i64} =smax64(0i64, binop_y_5576)
let {binop_x_5578 : i64} =add_nw64(binop_x_5572, binop_y_5577)
let {binop_y_5579 : i64} =sub_nw64(m₄_5383, 1i64)
let {binop_x_5580 : i64} =smax64(0i64, binop_y_5579)
let {binop_y_5581 : i64} =mul_nw64(n₁_5381, binop_x_5580)
let {binop_y_5582 : i64} =smax64(0i64, binop_y_5581)
let {binop_y_5583 : i64} =add_nw64(binop_x_5578, binop_y_5582)
let {binop_y_5584 : i64} =add_nw64(1i64, binop_y_5583)
let {bytes_5585 : i64} =mul_nw64(4i64, binop_y_5584)
let {mem_5586 : mem@device} =alloc(bytes_5585, @device)
let {mem_5620 : mem@device} =alloc(bytes_5585, @device)
let {binop_x_5621 : i64} =mul_nw64(n₁_5381, n₃_5382)
let {binop_x_5622 : i64} =mul_nw64(m₄_5383, binop_x_5621)
let {binop_y_5623 : i64} =mul_nw64(4i64, binop_x_5622)
let {bytes_5624 : i64} =smax64(0i64, binop_y_5623)
let {mem_5625 : mem@device} =alloc(bytes_5624, @device)
let {segmap_tblock_size_5554 : i64} =get_size(segmap_tblock_size_5538, thread_block_size)
let {num_tblocks_5555 : i64} =calc_num_tblocks(n₁_5381, segmap_num_tblocks_5540, segmap_tblock_size_5554)
let {as_coalesced_5568 : [n₁_5381][n₃_5382][m₄_5383]f32@mem_5586->
{offset: 0i64;
strides: [1i64, mul_nw64 (n₁_5381) (m₄_5383), n₁_5381];
shape: [n₁_5381, n₃_5382, m₄_5383]}} =manifest((1, 2, 0), as_5384)
let {ixfun_arg_5592 : i64} =mul_nw64(n₁_5381, m₄_5383)
let {binop_y_5593 : i64} =mul_nw64(4i64, m₄_5383)
let {bytes_5594 : i64} =smax64(0i64, binop_y_5593)
let {binop_x_5596 : i64} =mul_nw64(n₃_5382, m₄_5383)
let {binop_y_5597 : i64} =mul_nw64(4i64, binop_x_5596)
let {bytes_5598 : i64} =smax64(0i64, binop_y_5597)
let {defunc_0_map_res_5557 : [n₁_5381][n₃_5382][m₄_5383]f32@mem_5620->
{offset: 0i64;
strides: [1i64, mul_nw64 (n₁_5381) (m₄_5383), n₁_5381];
shape: [n₁_5381, n₃_5382, m₄_5383]}} =segmap(thread; virtualise; grid=num_tblocks_5555; blocksize=segmap_tblock_size_5554)
(gtid_5558<n₁_5381) (~phys_tid_5559) : {[n₃_5382][m₄_5383]f32} {
let {mem_5595 : mem@device} =alloc(bytes_5594, @device)
let {double_buffer_mem_5629 : mem@device} =alloc(bytes_5598, @device)
let {mem_5599 : mem@device} =alloc(bytes_5598, @device)
let {eta_p_5560 : [n₃_5382][m₄_5383]f32@mem_5586->
{offset: gtid_5558;
strides: [mul_nw64 (n₁_5381) (m₄_5383), n₁_5381];
shape: [n₃_5382, m₄_5383]}} =as_coalesced_5568[gtid_5558, 0i64 :+n₃_5382*1i64, 0i64 :+m₄_5383*1i64]
let {ext_mem_5603 : mem@device,
ext_5602 : i64,
ext_5601 : i64,
ext_5600 : i64,
tmp_5561 : [n₃_5382][m₄_5383]f32@ext_mem_5603->
{offset: ext_5602;
strides: [ext_5601, ext_5600];
shape: [n₃_5382, m₄_5383]}} =loop {mem_param_5591 : mem@device,
ctx_param_ext_5588 : i64,
ctx_param_ext_5589 : i64,
ctx_param_ext_5590 : i64,
A_5563 : [n₃_5382][m₄_5383]f32@mem_param_5591->
{offset: ctx_param_ext_5588;
strides: [ctx_param_ext_5589, ctx_param_ext_5590];
shape: [n₃_5382, m₄_5383]}} = {mem_5586, gtid_5558, ixfun_arg_5592, n₁_5381, eta_p_5560}
for_i_5562:i64<n₃_5382do {
let {irow_5564 : [m₄_5383]f32@mem_param_5591->
{offset: ctx_param_ext_5588;
strides: [ctx_param_ext_5590];
shape: [m₄_5383]}} =A_5563[0i64, 0i64 :+m₄_5383*1i64]
let {Ap_5565 : [j_m_i_5505][m₄_5383]f32@mem_param_5591->
{offset: add_nw64 (ctx_param_ext_5588) (ctx_param_ext_5589);
strides: [ctx_param_ext_5589, ctx_param_ext_5590];
shape: [j_m_i_5505, m₄_5383]}} =
#{index_certs_5518}
A_5563[1i64 :+j_m_i_5505*1i64, 0i64 :+m₄_5383*1i64]
let {concat_arg_5566 : [1i64][m₄_5383]f32@mem_5595->
{offset: 0i64;
strides: [m₄_5383, 1i64];
shape: [1i64, m₄_5383]}} =replicate([1i64], irow_5564)
let {++_res_5567 : [n₃_5382][m₄_5383]f32@mem_5599->
{offset: 0i64;
strides: [m₄_5383, 1i64];
shape: [n₃_5382, m₄_5383]}} =concat@0(n₃_5382, Ap_5565, concat_arg_5566)
let {double_buffer_array_5630 : [n₃_5382][m₄_5383]f32@double_buffer_mem_5629->
{offset: 0i64;
strides: [m₄_5383, 1i64];
shape: [n₃_5382, m₄_5383]}} =copy(++_res_5567)
in {double_buffer_mem_5629, 0i64, m₄_5383, 1i64, double_buffer_array_5630}
}
return {returnstmp_5561}
}
let {defunc_0_map_res_desired_form_5626 : [n₁_5381][n₃_5382][m₄_5383]f32@mem_5625->
{offset: 0i64;
strides: [mul_nw64 (m₄_5383) (n₃_5382), m₄_5383, 1i64];
shape: [n₁_5381, n₃_5382, m₄_5383]}} =manifest((0, 1, 2), defunc_0_map_res_5557)
in {mem_5625, defunc_0_map_res_desired_form_5626}
}
(This is syntactically invalid in master because it makes use of the simplified index functions from #2083.)
Short-circuiting performs a a short circuit of the concat/copy in the loop, storing the result directly in double_buffer_mem_5629. This is invalid, because double_buffer_mem_5629 is still used for live data (Ap_5565, an input to the concat), under the name of mem_param_5591 (a loop parameter).
All the aliases look right to me, so I'm not sure why this fails.
The text was updated successfully, but these errors were encountered:
This is not an urgent issue, as I rewrote double buffering entirely in a way that does not trigger this bug, but it would still be good not to leave around as a potential problem in the future.
Suppose we have a program like the following:
(This is syntactically invalid in master because it makes use of the simplified index functions from #2083.)
Short-circuiting performs a a short circuit of the
concat
/copy
in the loop, storing the result directly indouble_buffer_mem_5629
. This is invalid, becausedouble_buffer_mem_5629
is still used for live data (Ap_5565, an input to theconcat
), under the name ofmem_param_5591
(a loop parameter).All the aliases look right to me, so I'm not sure why this fails.
The text was updated successfully, but these errors were encountered: