diff --git a/test/test_loop_distribution.py b/test/test_loop_distribution.py new file mode 100644 index 000000000..4ec908193 --- /dev/null +++ b/test/test_loop_distribution.py @@ -0,0 +1,256 @@ +__copyright__ = "Copyright (C) 2021 Kaushik Kulkarni" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import sys +import numpy as np +import pyopencl as cl +import loopy as lp +from loopy.transform.loop_distribution import IllegalLoopDistributionError +import pytest + +import logging +logger = logging.getLogger(__name__) + +try: + import faulthandler +except ImportError: + pass +else: + faulthandler.enable() + +from pyopencl.tools import pytest_generate_tests_for_pyopencl \ + as pytest_generate_tests + +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa + +__all__ = [ + "pytest_generate_tests", + "cl" # "cl.create_some_context" + ] + + +def test_hello_loop_distribution(ctx_factory): + ctx = ctx_factory() + + t_unit = lp.make_kernel( + "{[i,j]: 0<=i, j<10}", + """ + for i + a[i] = 10 {id=w_a} + for j + b[i, j] = j*a[i] {id=w_b} + end + c[i] = 2*b[i, 5] {id=w_c} + end + """, + seq_dependencies=True) + + ref_t_unit = t_unit + + knl = lp.distribute_loops(t_unit.default_entrypoint, + insn_match="id:w_b", + outer_inames=frozenset()) + t_unit = t_unit.with_kernel(knl) + assert not (knl.id_to_insn["w_a"].within_inames + & knl.id_to_insn["w_b"].within_inames) + assert not (knl.id_to_insn["w_c"].within_inames + & knl.id_to_insn["w_b"].within_inames) + assert not (knl.id_to_insn["w_a"].within_inames + & knl.id_to_insn["w_c"].within_inames) + + lp.auto_test_vs_ref(ref_t_unit, ctx, t_unit) + + +def test_soundness_check(ctx_factory): + ctx = ctx_factory() + + # {{{ WAW deps + + tunit = lp.make_kernel( + "{[i]: 1<=i<10}", + """ + a[i] = i {id=first_w_a} + a[i-1] = i**2 {id=second_w_a, dep=first_w_a} + """ + ) + ref_tunit = tunit + + knl = lp.distribute_loops(tunit.default_entrypoint, + "id:second_w_a", + outer_inames=frozenset()) + tunit = tunit.with_kernel(knl) + assert not (knl.id_to_insn["first_w_a"].within_inames + & knl.id_to_insn["second_w_a"].within_inames) + lp.auto_test_vs_ref(ref_tunit, ctx, tunit) + + tunit = lp.make_kernel( + "{[i]: 0<=i<10}", + """ + a[i] = i {id=first_w_a} + a[i+1] = i**2 {id=second_w_a, dep=first_w_a} + """ + ) + + with pytest.raises(IllegalLoopDistributionError): + lp.distribute_loops(tunit.default_entrypoint, + "id:second_w_a", + outer_inames=frozenset()) + + # }}} + + # {{{ RAW deps + + tunit = lp.make_kernel( + "{[i]: 1<=i<10}", + """ + b[0] = 0 {id=first_w_b} + a[i] = i {id=first_w_a} + b[i] = 2*a[i-1] {id=second_w_b} + """, + seq_dependencies=True, + ) + ref_tunit = tunit + + knl = lp.distribute_loops(tunit.default_entrypoint, + "id:second_w_b", + outer_inames=frozenset()) + tunit = tunit.with_kernel(knl) + assert not (knl.id_to_insn["first_w_a"].within_inames + & knl.id_to_insn["second_w_b"].within_inames) + lp.auto_test_vs_ref(ref_tunit, ctx, tunit) + + tunit = lp.make_kernel( + "{[i]: 0<=i<10}", + """ + a[i] = i {id=first_w_a} + b[i] = 2*a[i+1] {id=first_w_b} + """, + seq_dependencies=True + ) + + with pytest.raises(IllegalLoopDistributionError): + lp.distribute_loops(tunit.default_entrypoint, + "id:first_w_b", + outer_inames=frozenset()) + + # }}} + + # {{{ WAR deps + + tunit = lp.make_kernel( + "{[i, j]: 0<=i<10 and 0<=j<11}", + """ + b[j] = j**2 + a[i] = b[i+1] {id=first_w_a} + b[i] = 2*a[i] {id=first_w_b} + """, + seq_dependencies=True + ) + ref_tunit = tunit + + knl = lp.distribute_loops(tunit.default_entrypoint, + "id:first_w_b", + outer_inames=frozenset()) + tunit = tunit.with_kernel(knl) + assert not (knl.id_to_insn["first_w_a"].within_inames + & knl.id_to_insn["first_w_b"].within_inames) + lp.auto_test_vs_ref(ref_tunit, ctx, tunit) + + tunit = lp.make_kernel( + "{[i]: 1<=i<10}", + """ + b[0] = 0 {id=first_w_b} + a[i] = b[i-1] {id=first_w_a} + b[i] = 2*a[i] {id=second_w_b} + """, + seq_dependencies=True, + ) + + with pytest.raises(IllegalLoopDistributionError): + lp.distribute_loops(tunit.default_entrypoint, + "id:second_w_b", + outer_inames=frozenset()) + + # }}} + + +def test_reduction_inames_get_duplicated(ctx_factory): + ctx = ctx_factory() + + tunit = lp.make_kernel( + "{[i, j]: 0<=i<100 and 0<=j<10}", + """ + out1[i] = sum(j, mat1[j] * x1[i, j]) {id=w_out1} + out2[i] = sum(j, mat2[j] * x2[i, j]) {id=w_out2} + """, + ) + tunit = lp.add_dtypes(tunit, {"mat1": np.float64, + "mat2": np.float64, + "x1": np.float64, + "x2": np.float64, + }) + + ref_tunit = tunit + + knl = lp.distribute_loops(tunit.default_entrypoint, + "id:w_out2", + outer_inames=frozenset()) + tunit = tunit.with_kernel(knl) + + assert not (knl.id_to_insn["w_out1"].within_inames + & knl.id_to_insn["w_out2"].within_inames) + assert not (knl.id_to_insn["w_out1"].reduction_inames() + & knl.id_to_insn["w_out2"].reduction_inames()) + lp.auto_test_vs_ref(ref_tunit, ctx, tunit) + + +def test_avoids_unnecessary_loop_distribution(ctx_factory): + ctx = ctx_factory() + tunit = lp.make_kernel( + "{[i]: 0 <= i < 10}", + """ + y0[i] = i {id=w_y0} + y1[i] = i**2 {id=w_y1} + y2[i] = y0[i] + i**3 {id=w_y2} + y3[i] = 2*y2[i] {id=w_y3} + y4[i] = i**4 + y1[i] {id=w_y4} + """) + ref_tunit = tunit + + knl = lp.distribute_loops(tunit.default_entrypoint, + insn_match="writes:y2 or writes:y4", + outer_inames=frozenset()) + tunit = tunit.with_kernel(knl) + + assert (knl.id_to_insn["w_y2"].within_inames + == knl.id_to_insn["w_y4"].within_inames) + lp.auto_test_vs_ref(ref_tunit, ctx, tunit) + + +if __name__ == "__main__": + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + +# vim: fdm=marker