Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Invalid short circuiting #2084

Open
athas opened this issue Jan 13, 2024 · 3 comments
Open

Invalid short circuiting #2084

athas opened this issue Jan 13, 2024 · 3 comments
Assignees

Comments

@athas
Copy link
Member

athas commented Jan 13, 2024

Suppose we have a program like the following:

types {
  
}



entry("main",
      {as: [][][]f32},
      {*[][][]f32})
  entry_main (as_mem_5569 : mem@device,
              n_5381 : i64,
              n_5382 : i64,
              m_5383 : i64,
              as_5384 : [n_5381][n_5382][m_5383]f32 @ as_mem_5569 ->
                        {offset: 0i64;
                         strides: [mul_nw64 (m_5383) (n_5382), m_5383, 1i64];
                         shape: [n_5381, n_5382, m_5383]})
  : {mem@device,
     *[n_5381][n_5382][m_5383]f32 @ ?0@device ->
     {offset: 0i64;
      strides: [mul_nw64 (m_5383) (n_5382), m_5383, 1i64];
      shape: [n_5381, n_5382, m_5383]}} = {
  let {j_m_i_5505 : i64} =
    sub64(n_5382, 1i64)
  let {empty_slice_5506 : bool} =
    eq_i64(j_m_i_5505, 0i64)
  let {zero_leq_i_p_m_t_s_5507 : bool} =
    sle64(0i64, j_m_i_5505)
  let {i_lte_j_5508 : bool} =
    sle64(1i64, n_5382)
  let {loop_nonempty_5509 : bool} =
    slt64(0i64, n_5382)
  let {loop_not_taken_5510 : bool} =
    not loop_nonempty_5509
  let {i_p_m_t_s_leq_w_5513 : bool} =
    slt64(j_m_i_5505, n_5382)
  let {y_5514 : bool} =
    logand(zero_leq_i_p_m_t_s_5507, i_p_m_t_s_leq_w_5513)
  let {forwards_ok_5515 : bool} =
    logand(i_lte_j_5508, y_5514)
  let {ok_or_empty_5516 : bool} =
    logor(empty_slice_5506, forwards_ok_5515)
  let {protect_assert_disj_5517 : bool} =
    logor(loop_not_taken_5510, ok_or_empty_5516)
  let {index_certs_5518 : unit} =
    assert(protect_assert_disj_5517, {"Index [", 1i64 : i64, ":", n_5382 : i64, "] out of bounds for array of shape [", n_5382 : i64, "]."}, "tests/issue811.fut:7:13-19")
  let {binop_y_5570 : i64} =
    sub_nw64(n_5381, 1i64)
  let {binop_x_5572 : i64} =
    smax64(0i64, binop_y_5570)
  let {binop_x_5574 : i64} =
    smax64(0i64, j_m_i_5505)
  let {binop_y_5575 : i64} =
    mul_nw64(n_5381, m_5383)
  let {binop_y_5576 : i64} =
    mul_nw64(binop_x_5574, binop_y_5575)
  let {binop_y_5577 : i64} =
    smax64(0i64, binop_y_5576)
  let {binop_x_5578 : i64} =
    add_nw64(binop_x_5572, binop_y_5577)
  let {binop_y_5579 : i64} =
    sub_nw64(m_5383, 1i64)
  let {binop_x_5580 : i64} =
    smax64(0i64, binop_y_5579)
  let {binop_y_5581 : i64} =
    mul_nw64(n_5381, binop_x_5580)
  let {binop_y_5582 : i64} =
    smax64(0i64, binop_y_5581)
  let {binop_y_5583 : i64} =
    add_nw64(binop_x_5578, binop_y_5582)
  let {binop_y_5584 : i64} =
    add_nw64(1i64, binop_y_5583)
  let {bytes_5585 : i64} =
    mul_nw64(4i64, binop_y_5584)
  let {mem_5586 : mem@device} =
    alloc(bytes_5585, @device)
  let {mem_5620 : mem@device} =
    alloc(bytes_5585, @device)
  let {binop_x_5621 : i64} =
    mul_nw64(n_5381, n_5382)
  let {binop_x_5622 : i64} =
    mul_nw64(m_5383, binop_x_5621)
  let {binop_y_5623 : i64} =
    mul_nw64(4i64, binop_x_5622)
  let {bytes_5624 : i64} =
    smax64(0i64, binop_y_5623)
  let {mem_5625 : mem@device} =
    alloc(bytes_5624, @device)
  let {segmap_tblock_size_5554 : i64} =
    get_size(segmap_tblock_size_5538, thread_block_size)
  let {num_tblocks_5555 : i64} =
    calc_num_tblocks(n_5381, segmap_num_tblocks_5540, segmap_tblock_size_5554)
  let {as_coalesced_5568 : [n_5381][n_5382][m_5383]f32 @ mem_5586 ->
                           {offset: 0i64;
                            strides: [1i64, mul_nw64 (n_5381) (m_5383), n_5381];
                            shape: [n_5381, n_5382, m_5383]}} =
    manifest((1, 2, 0), as_5384)
  let {ixfun_arg_5592 : i64} =
    mul_nw64(n_5381, m_5383)
  let {binop_y_5593 : i64} =
    mul_nw64(4i64, m_5383)
  let {bytes_5594 : i64} =
    smax64(0i64, binop_y_5593)
  let {binop_x_5596 : i64} =
    mul_nw64(n_5382, m_5383)
  let {binop_y_5597 : i64} =
    mul_nw64(4i64, binop_x_5596)
  let {bytes_5598 : i64} =
    smax64(0i64, binop_y_5597)
  let {defunc_0_map_res_5557 : [n_5381][n_5382][m_5383]f32 @ mem_5620 ->
                               {offset: 0i64;
                                strides: [1i64, mul_nw64 (n_5381) (m_5383), n_5381];
                                shape: [n_5381, n_5382, m_5383]}} =
    segmap(thread; virtualise; grid=num_tblocks_5555; blocksize=segmap_tblock_size_5554)
    (gtid_5558 < n_5381) (~phys_tid_5559) : {[n_5382][m_5383]f32} {
      let {mem_5595 : mem@device} =
        alloc(bytes_5594, @device)
      let {double_buffer_mem_5629 : mem@device} =
        alloc(bytes_5598, @device)
      let {mem_5599 : mem@device} =
        alloc(bytes_5598, @device)
      let {eta_p_5560 : [n_5382][m_5383]f32 @ mem_5586 ->
                        {offset: gtid_5558;
                         strides: [mul_nw64 (n_5381) (m_5383), n_5381];
                         shape: [n_5382, m_5383]}} =
        as_coalesced_5568[gtid_5558, 0i64 :+ n_5382 * 1i64, 0i64 :+ m_5383 * 1i64]
      let {ext_mem_5603 : mem@device,
           ext_5602 : i64,
           ext_5601 : i64,
           ext_5600 : i64,
           tmp_5561 : [n_5382][m_5383]f32 @ ext_mem_5603 ->
                      {offset: ext_5602;
                       strides: [ext_5601, ext_5600];
                       shape: [n_5382, m_5383]}} =
        loop {mem_param_5591 : mem@device,
              ctx_param_ext_5588 : i64,
              ctx_param_ext_5589 : i64,
              ctx_param_ext_5590 : i64,
              A_5563 : [n_5382][m_5383]f32 @ mem_param_5591 ->
                       {offset: ctx_param_ext_5588;
                        strides: [ctx_param_ext_5589, ctx_param_ext_5590];
                        shape: [n_5382, m_5383]}} = {mem_5586, gtid_5558, ixfun_arg_5592, n_5381, eta_p_5560}
        for _i_5562:i64 < n_5382 do {
          let {irow_5564 : [m_5383]f32 @ mem_param_5591 ->
                           {offset: ctx_param_ext_5588;
                            strides: [ctx_param_ext_5590];
                            shape: [m_5383]}} =
            A_5563[0i64, 0i64 :+ m_5383 * 1i64]
          let {Ap_5565 : [j_m_i_5505][m_5383]f32 @ mem_param_5591 ->
                         {offset: add_nw64 (ctx_param_ext_5588) (ctx_param_ext_5589);
                          strides: [ctx_param_ext_5589, ctx_param_ext_5590];
                          shape: [j_m_i_5505, m_5383]}} =
            #{index_certs_5518}
            A_5563[1i64 :+ j_m_i_5505 * 1i64, 0i64 :+ m_5383 * 1i64]
          let {concat_arg_5566 : [1i64][m_5383]f32 @ mem_5595 ->
                                 {offset: 0i64;
                                  strides: [m_5383, 1i64];
                                  shape: [1i64, m_5383]}} =
            replicate([1i64], irow_5564)
          let {++_res_5567 : [n_5382][m_5383]f32 @ mem_5599 ->
                             {offset: 0i64;
                              strides: [m_5383, 1i64];
                              shape: [n_5382, m_5383]}} =
            concat@0(n_5382, Ap_5565, concat_arg_5566)
          let {double_buffer_array_5630 : [n_5382][m_5383]f32 @ double_buffer_mem_5629 ->
                                          {offset: 0i64;
                                           strides: [m_5383, 1i64];
                                           shape: [n_5382, m_5383]}} =
            copy(++_res_5567)
          in {double_buffer_mem_5629, 0i64, m_5383, 1i64, double_buffer_array_5630}
        }
      return {returns tmp_5561}
    }
  let {defunc_0_map_res_desired_form_5626 : [n_5381][n_5382][m_5383]f32 @ mem_5625 ->
                                            {offset: 0i64;
                                             strides: [mul_nw64 (m_5383) (n_5382), m_5383, 1i64];
                                             shape: [n_5381, n_5382, m_5383]}} =
    manifest((0, 1, 2), defunc_0_map_res_5557)
  in {mem_5625, defunc_0_map_res_desired_form_5626}
}

(This is syntactically invalid in master because it makes use of the simplified index functions from #2083.)

Short-circuiting performs a a short circuit of the concat/copy in the loop, storing the result directly in double_buffer_mem_5629. This is invalid, because double_buffer_mem_5629 is still used for live data (Ap_5565, an input to the concat), under the name of mem_param_5591 (a loop parameter).

All the aliases look right to me, so I'm not sure why this fails.

@Munksgaard
Copy link
Collaborator

I can take a look, but it won't be until tomorrow evening.

@athas
Copy link
Member Author

athas commented Jan 13, 2024

Enjoy!

@athas
Copy link
Member Author

athas commented Jan 13, 2024

This is not an urgent issue, as I rewrote double buffering entirely in a way that does not trigger this bug, but it would still be good not to leave around as a potential problem in the future.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants