Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vertical loop fusion and demotion of temporaries #374

Merged
merged 15 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 14 additions & 13 deletions loki/batch/tests/test_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2887,21 +2887,22 @@ def test_pipeline_config_compose(config):
assert isinstance(pipeline, Pipeline)

# Check that the pipeline is correctly composed
assert len(pipeline.transformations) == 7
assert len(pipeline.transformations) == 8
assert type(pipeline.transformations[0]).__name__ == 'RemoveCodeTransformation'
assert type(pipeline.transformations[1]).__name__ == 'SCCBaseTransformation'
assert type(pipeline.transformations[2]).__name__ == 'SCCDevectorTransformation'
assert type(pipeline.transformations[3]).__name__ == 'SCCDemoteTransformation'
assert type(pipeline.transformations[4]).__name__ == 'SCCRevectorTransformation'
assert type(pipeline.transformations[5]).__name__ == 'SCCAnnotateTransformation'
assert type(pipeline.transformations[6]).__name__ == 'ModuleWrapTransformation'
assert type(pipeline.transformations[1]).__name__ == 'SCCFuseVerticalLoops'
assert type(pipeline.transformations[2]).__name__ == 'SCCBaseTransformation'
assert type(pipeline.transformations[3]).__name__ == 'SCCDevectorTransformation'
assert type(pipeline.transformations[4]).__name__ == 'SCCDemoteTransformation'
assert type(pipeline.transformations[5]).__name__ == 'SCCRevectorTransformation'
assert type(pipeline.transformations[6]).__name__ == 'SCCAnnotateTransformation'
assert type(pipeline.transformations[7]).__name__ == 'ModuleWrapTransformation'

# Check for some specified and default constructor flags
assert pipeline.transformations[0].call_names == ('dr_hook',)
assert pipeline.transformations[0].remove_imports is False
assert isinstance(pipeline.transformations[1].horizontal, Dimension)
assert pipeline.transformations[1].horizontal.size == 'KLON'
assert pipeline.transformations[1].horizontal.index == 'JL'
assert pipeline.transformations[1].directive == 'openacc'
assert pipeline.transformations[2].trim_vector_sections is True
assert pipeline.transformations[6].replace_ignore_items is True
assert isinstance(pipeline.transformations[2].horizontal, Dimension)
assert pipeline.transformations[2].horizontal.size == 'KLON'
assert pipeline.transformations[2].horizontal.index == 'JL'
assert pipeline.transformations[2].directive == 'openacc'
assert pipeline.transformations[3].trim_vector_sections is True
assert pipeline.transformations[7].replace_ignore_items is True
1 change: 1 addition & 0 deletions loki/transformations/single_column/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@
from loki.transformations.single_column.scc_cuf import * # noqa
from loki.transformations.single_column.vector import * # noqa
from loki.transformations.single_column.scc_low_level import * # noqa
from loki.transformations.single_column.vertical import * # noqa
5 changes: 4 additions & 1 deletion loki/transformations/single_column/scc.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from loki.transformations.single_column.vector import (
SCCDevectorTransformation, SCCDemoteTransformation, SCCRevectorTransformation
)

from loki.transformations.single_column.vertical import SCCFuseVerticalLoops

__all__ = [
'SCCVectorPipeline', 'SCCHoistPipeline', 'SCCStackPipeline', 'SCCRawStackPipeline'
Expand Down Expand Up @@ -75,6 +75,7 @@
"""
SCCVectorPipeline = partial(
Pipeline, classes=(
SCCFuseVerticalLoops,
SCCBaseTransformation,
SCCDevectorTransformation,
SCCDemoteTransformation,
Expand Down Expand Up @@ -121,6 +122,7 @@
"""
SCCHoistPipeline = partial(
Pipeline, classes=(
SCCFuseVerticalLoops,
SCCBaseTransformation,
SCCDevectorTransformation,
SCCDemoteTransformation,
Expand Down Expand Up @@ -166,6 +168,7 @@
"""
SCCStackPipeline = partial(
Pipeline, classes=(
SCCFuseVerticalLoops,
SCCBaseTransformation,
SCCDevectorTransformation,
SCCDemoteTransformation,
Expand Down
255 changes: 255 additions & 0 deletions loki/transformations/single_column/tests/test_scc_vertical.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
# (C) Copyright 2018- ECMWF.
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

import pytest

from loki import Subroutine, Dimension
from loki.frontend import available_frontends
from loki.ir import FindNodes, Loop, FindVariables
from loki.transformations.single_column import SCCFuseVerticalLoops


@pytest.fixture(scope='module', name='horizontal')
def fixture_horizontal():
return Dimension(
name='horizontal', size='nlon', index='jl',
bounds=('start', 'end'), aliases=('nproma',)
)

@pytest.fixture(scope='module', name='horizontal_bounds_aliases')
def fixture_horizontal_bounds_aliases():
return Dimension(
name='horizontal_bounds_aliases', size='nlon', index='jl',
bounds=('start', 'end'), aliases=('nproma',),
bounds_aliases=('bnds%start', 'bnds%end')
)

@pytest.fixture(scope='module', name='vertical')
def fixture_vertical():
return Dimension(name='vertical', size='nz', index='jk', aliases=('nlev',))

@pytest.fixture(scope='module', name='blocking')
def fixture_blocking():
return Dimension(name='blocking', size='nb', index='b')


@pytest.mark.parametrize('frontend', available_frontends())
def test_simple_scc_fuse_verticals_transformation(frontend, horizontal, vertical):
"""
Test simple example of vertical loop fusion and demotion of temporaries.
"""

fcode_kernel = """
SUBROUTINE compute_column(start, end, nlon, nz, q, t)
INTEGER, INTENT(IN) :: start, end ! Iteration indices
INTEGER, INTENT(IN) :: nlon, nz ! Size of the horizontal and vertical
REAL, INTENT(INOUT) :: t(nlon,nz)
REAL, INTENT(INOUT) :: q(nlon,nz)
REAL :: temp_t(nlon, nz)
REAL :: temp_q(nlon, nz)
INTEGER :: jl, JK
REAL :: c

c = 5.345
!$loki loop-fusion group(1)
DO JK = 1, nz
DO jl = start, end
temp_t(jl, jk) = c
temp_q(jl, JK) = c
END DO
END DO

!$loki loop-fusion group(1)
DO jk = 2, nz
DO jl = start, end
t(jl, jk) = temp_t(jl, jk) * jk
q(jl, jk) = q(jl, jk-1) + t(jl, jk) * temp_q(jl, jk)
END DO
END DO

! The scaling is purposefully upper-cased
DO JL = START, END
Q(JL, NZ) = Q(JL, NZ) * C
END DO
END SUBROUTINE compute_column
"""
kernel = Subroutine.from_source(fcode_kernel, frontend=frontend)

# Ensure we have three loops in the kernel prior to transformation
kernel_loops = FindNodes(Loop).visit(kernel.body)
assert len(kernel_loops) == 5

# no-op as 'compute_column' is not within apply_to
SCCFuseVerticalLoops(vertical=vertical, apply_to=('another_kernel',)).apply(kernel, role='kernel')
# Ensure we have three loops in the kernel prior to transformation
kernel_loops = FindNodes(Loop).visit(kernel.body)
assert len(kernel_loops) == 5

# actual loop fusion and demotion ... (as apply_to is not provided and therefore all routines are dispatched)
SCCFuseVerticalLoops(vertical=vertical).apply(kernel, role='kernel')

# Ensure the two vertical loops are fused
kernel_loops = FindNodes(Loop).visit(kernel.body)
assert len(kernel_loops) == 4
assert kernel_loops[0].variable.name.lower() == 'jk'
assert kernel_loops[-1].variable.name.lower() == 'jl'
assert len([loop for loop in kernel_loops if loop.variable.name.lower() == 'jk']) == 1
kernel_var_map = kernel.variable_map
assert kernel_var_map['temp_t'].shape == (horizontal.size,)
assert kernel_var_map['temp_q'].shape == (horizontal.size,)
kernel_vars = [var for var in FindVariables().visit(kernel.body) if var.name.lower() in ['temp_t', 'temp_q']]
for var in kernel_vars:
assert var.shape == (horizontal.size,)
assert var.dimensions == (horizontal.index,)


@pytest.mark.parametrize('frontend', available_frontends())
@pytest.mark.parametrize('ignore', (False, True))
def test_scc_fuse_verticals_transformation(frontend, horizontal, vertical, ignore):
"""
Test somewhat more sophisticated example of vertical loop fusion
and demotion of temporaries.
"""

fcode_kernel = f"""
SUBROUTINE compute_column(start, end, nlon, nz, q, t)
INTEGER, INTENT(IN) :: start, end ! Iteration indices
INTEGER, INTENT(IN) :: nlon, nz ! Size of the horizontal and vertical
REAL, INTENT(INOUT) :: t(nlon,nz)
REAL, INTENT(INOUT) :: q(nlon,nz)
REAL :: temp_t(nlon, nz)
REAL :: temp_t2(nlon, nz)
REAL :: temp_q(nlon, nz)
REAL :: temp_q2(nlon, nz)
REAL :: temp_cld(nlon, nz, 5)
INTEGER :: jl, jk, jm
REAL :: c

{'!$loki k-caching ignore(temp_q2)' if ignore else ''}

c = 5.345
!$loki loop-fusion group(1-init)
DO jk = 1, nz
DO jl = start, end
temp_t(jl, jk) = c
temp_q(jl, jk) = c
temp_t2(jl, jk) = 2*c
END DO
END DO

!$loki loop-fusion group(1)
!$loki loop-interchange
DO jm=1,5
DO jk = 1, nz
DO jl = start, end
temp_cld(jl, jk, jm) = 3.1415
END DO
END DO
END DO

DO jl = start, end
q(jl, jk) = 0.
END DO

!$loki loop-fusion group(1) insert
DO jk = 2, nz
DO jl = start, end
t(jl, jk) = temp_t(jl, jk) * temp_t2(jl, jk-1) * temp_cld(jl, jk, 1)
q(jl, jk) = q(jl, jk-1) + t(jl, jk) * temp_q(jl, jk)
END DO
END DO

CALL nested_kernel(start, end, nlon, nz, q)

!$loki loop-fusion group(2)
DO jk = 2, nz
DO jl = start, end
temp_q2(jl, jk) = 3.1415
END DO
END DO

!$loki loop-fusion group(2)
DO jk = 2, nz
DO jl = start, end
t(jl, jk) = t(jl, jk) + 3.1415
q(jl, jk) = q(jl, jk-1) + t(jl, jk) * temp_q(jl, jk) + temp_q2(jl, jk)
END DO
END DO

! The scaling is purposefully upper-cased
DO JL = START, END
Q(JL, NZ) = Q(JL, NZ) * C
END DO
END SUBROUTINE compute_column
"""


kernel = Subroutine.from_source(fcode_kernel, frontend=frontend)

# Ensure we have three loops in the kernel prior to transformation
kernel_loops = FindNodes(Loop).visit(kernel.body)
assert len(kernel_loops) == 13
SCCFuseVerticalLoops(vertical=vertical).apply(kernel, role='kernel')

# Ensure the two vertical loops are fused
kernel_loops = FindNodes(Loop).visit(kernel.body)
assert len(kernel_loops) == 12
vertical_loops = [loop for loop in kernel_loops if loop.variable.name.lower() == vertical.index]
assert len(vertical_loops) == 3

shape1D = (horizontal.size,)
shape2D = (horizontal.size, vertical.size)
dimension1D = (horizontal.index,)
dimension2D = (horizontal.index,vertical.index)
dimension2DI1 = (horizontal.index, f'{vertical.index}-1')

vertical_loop_0_vars = FindVariables().visit(vertical_loops[0].body)
vertical_loop_0_var_names = [var.name.lower() for var in vertical_loop_0_vars]
vertical_loop_0_var_dict = dict(zip(vertical_loop_0_var_names, vertical_loop_0_vars))
assert 'temp_t2' in vertical_loop_0_var_names
assert 'temp_t' not in vertical_loop_0_var_names
assert 'temp_q' not in vertical_loop_0_var_names
assert 'temp_q2' not in vertical_loop_0_var_names
assert 'temp_cld' not in vertical_loop_0_var_names
assert vertical_loop_0_var_dict['temp_t2'].shape == shape2D
assert vertical_loop_0_var_dict['temp_t2'].dimensions == dimension2D

vertical_loop_1_vars = FindVariables().visit(vertical_loops[1].body)
vertical_loop_1_var_names = [var.name.lower() for var in vertical_loop_1_vars]
vertical_loop_1_var_dict = dict(zip(vertical_loop_1_var_names, vertical_loop_1_vars))
assert 'temp_t2' in vertical_loop_1_var_names
assert 'temp_t' in vertical_loop_1_var_names
assert 'temp_q' in vertical_loop_1_var_names
assert 'temp_q2' not in vertical_loop_1_vars
assert 'temp_cld' in vertical_loop_1_var_names
assert vertical_loop_1_var_dict['temp_t2'].shape == shape2D
assert vertical_loop_1_var_dict['temp_t2'].dimensions == dimension2DI1
assert vertical_loop_1_var_dict['temp_t'].shape == shape1D
assert vertical_loop_1_var_dict['temp_t'].dimensions == dimension1D
assert vertical_loop_1_var_dict['temp_q'].shape == shape2D
assert vertical_loop_1_var_dict['temp_q'].dimensions == dimension2D
assert vertical_loop_1_var_dict['temp_cld'].shape == shape1D + (5,)
assert vertical_loop_1_var_dict['temp_cld'].dimensions in (dimension1D + (1,), dimension1D + ('jm',))

vertical_loop_2_vars = FindVariables().visit(vertical_loops[2].body)
vertical_loop_2_var_names = [var.name.lower() for var in vertical_loop_2_vars]
vertical_loop_2_var_dict = dict(zip(vertical_loop_2_var_names, vertical_loop_2_vars))
assert 'temp_t2' not in vertical_loop_2_var_names
assert 'temp_t' not in vertical_loop_2_var_names
assert 'temp_q' in vertical_loop_2_var_names
assert 'temp_q2' in vertical_loop_2_var_names
assert 'temp_cld' not in vertical_loop_2_var_names
assert vertical_loop_2_var_dict['temp_q'].shape == shape2D
assert vertical_loop_2_var_dict['temp_q'].dimensions == dimension2D
assert vertical_loop_2_var_dict['temp_q2'].shape == shape2D if ignore else shape1D
assert vertical_loop_2_var_dict['temp_q2'].dimensions == dimension2D if ignore else dimension1D

kernel_var_map = kernel.variable_map
assert kernel_var_map['temp_t'].shape == shape1D
assert kernel_var_map['temp_t2'].shape == shape2D
assert kernel_var_map['temp_q'].shape == shape2D
assert kernel_var_map['temp_q2'].shape == shape2D if ignore else shape1D
Loading
Loading