Skip to content

Commit

Permalink
Merge pull request #329 from ecmwf-ifs/naan-split-read-write
Browse files Browse the repository at this point in the history
Split reads and writes for certain accumulation patterns
  • Loading branch information
mlange05 authored Aug 14, 2024
2 parents a0dade4 + ad1d51a commit cb2d5d5
Show file tree
Hide file tree
Showing 4 changed files with 336 additions and 1 deletion.
1 change: 1 addition & 0 deletions loki/transformations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,4 @@
from loki.transformations.pool_allocator import * # noqa
from loki.transformations.utilities import * # noqa
from loki.transformations.block_index_transformations import * # noqa
from loki.transformations.split_read_write import * # noqa
166 changes: 166 additions & 0 deletions loki/transformations/split_read_write.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
# (C) Copyright 2018- ECMWF.
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.


from loki.batch import Transformation, ProcedureItem
from loki.expression import Array, SubstituteExpressions
from loki.tools import as_tuple
from loki.ir import (
pragma_regions_attached, is_loki_pragma, nodes as ir, FindNodes, Transformer
)

__all__ = ['SplitReadWriteTransformation']

class SplitReadWriteWalk(Transformer):
"""
A :any:`Transformer` class to traverse the IR, in-place replace read-write
assignments with reads, and build a transformer map for the corresponding writes.
Parameters
----------
dimensions : list
A list of :any:`Dimension` objects corresponding to all :any:`Loop`s in the ``!$loki split-read-write`` region.
variable_map : dict
The variable_map of the parent :any:`Subroutine`.
count : int
A running count of the newly created temporaries in the parent :any:`Subroutine` so that
temporaries created by previous ``!$loki split-read-write`` regions are not redefined.
"""

def __init__(self, dimensions, variable_map, count=-1, **kwargs):
self.write_map = {}
self.temp_count = count
self.lhs_var_map = {}
self.dimensions = dimensions
self.tmp_vars = []

# parent subroutine variable_map
self.variable_map = variable_map

kwargs['inplace'] = True
super().__init__(**kwargs)

def visit_Loop(self, o, **kwargs):

dim = [d for d in self.dimensions if d.index == o.variable]
dim_nest = kwargs.pop('dim_nest', [])
return super().visit_Node(o, dim_nest=dim_nest + dim, **kwargs)

def visit_Assignment(self, o, **kwargs):

dim_nest = kwargs.pop('dim_nest', [])
write = None

# filter out non read-write assignments and scalars
if isinstance(o.lhs, Array) and o.lhs.name in o.rhs:

rhs = SubstituteExpressions(self.lhs_var_map).visit(o.rhs)
if not o.lhs in self.lhs_var_map:
_dims = []
_shape = []

# determine shape of temporary declaration and assignment
for s in o.lhs.type.shape:
if (dim := [dim for dim in self.dimensions
if s in dim.size_expressions]):
if dim[0] in dim_nest:
_shape += [self.variable_map[dim[0].size]]
_dims += [self.variable_map[dim[0].index]]

# define var to store temporary assignment
self.temp_count += 1
_type = o.lhs.type.clone(shape=as_tuple(_shape), intent=None)
tmp_var = o.lhs.clone(name=f'loki_temp_{self.temp_count}',
dimensions=as_tuple(_dims), type=_type)
self.lhs_var_map[o.lhs] = tmp_var
self.tmp_vars += [tmp_var,]

write = as_tuple(ir.Assignment(lhs=o.lhs, rhs=tmp_var))

o._update(lhs=self.lhs_var_map[o.lhs], rhs=rhs)

self.write_map[o] = write
return o

def visit_LeafNode(self, o, **kwargs):
# remove all other leaf nodes from second copy of region
self.write_map[o] = None
return super().visit_Node(o, **kwargs)

class SplitReadWriteTransformation(Transformation):
"""
When accumulating values to multiple components of an array, a compiler cannot rule out
the possibility that the indices alias the same address. Consider for example the following
code:
.. code-block:: fortran
!$loki split-read-write
do jlon=1,nproma
var(jlon, n1) = var(jlon, n1) + 1.
var(jlon, n2) = var(jlon, n2) + 1.
enddo
!$loki end split-read-write
In the above example, there is no guarantee that ``n1`` and ``n2`` do not in fact point to the same location.
Therefore the load and store instructions for ``var`` have to be executed in order.
For cases where the user knows ``n1`` and ``n2`` indeed represent distinct locations, this transformation
provides a pragma assisted mechanism to split the reads and writes, and therefore make the loads independent
from the stores. The above code would therefore be transformed to:
.. code-block:: fortran
!$loki split-read-write
do jlon=1,nproma
loki_temp_0(jlon) = var(jlon, n1) + 1.
loki_temp_1(jlon) = var(jlon, n2) + 1.
enddo
do jlon=1,nproma
var(jlon, n1) = loki_temp_0(jlon)
var(jlon, n2) = loki_temp_1(jlon)
enddo
!$loki end split-read-write
Parameters
----------
dimensions : list
A list of :any:`Dimension` objects corresponding to all :any:`Loop`s in the ``!$loki split-read-write`` region.
"""

item_filter = (ProcedureItem,)

def __init__(self, dimensions):
self.dimensions = as_tuple(dimensions)

def transform_subroutine(self, routine, **kwargs):

# cache variable_map for fast lookup later
variable_map = routine.variable_map
temp_counter = -1
tmp_vars = []

# find split read-write pragmas
with pragma_regions_attached(routine):
for region in FindNodes(ir.PragmaRegion).visit(routine.body):
if is_loki_pragma(region.pragma, starts_with='split-read-write'):

transformer = SplitReadWriteWalk(self.dimensions, variable_map, count=temp_counter)
transformer.visit(region.body)

temp_counter += (transformer.temp_count + 1)
tmp_vars += transformer.tmp_vars

if transformer.write_map:
new_writes = Transformer(transformer.write_map).visit(region.body)
region.append(new_writes)

# add declarations for temporaries
if tmp_vars:
tmp_vars = set(var.clone(dimensions=var.type.shape) for var in tmp_vars)
routine.variables += as_tuple(tmp_vars)
166 changes: 166 additions & 0 deletions loki/transformations/tests/test_split_read_write.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
# (C) Copyright 2018- ECMWF.
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

import pytest

from loki.expression import FindVariables
from loki import Dimension, Subroutine
from loki.ir import FindNodes, nodes as ir, pragma_regions_attached, is_loki_pragma
from loki.frontend import available_frontends
from loki.transformations import SplitReadWriteTransformation


@pytest.fixture(scope='module', name='horizontal')
def fixture_horizontal():
return Dimension(name='horizontal', size='nlon', index='jl', bounds=('start', 'end'), aliases=('nproma',))

@pytest.fixture(scope='module', name='vertical')
def fixture_vertical():
return Dimension(name='vertical', size='nz', index='jk', aliases=('nlev',))


@pytest.mark.parametrize('frontend', available_frontends())
def test_split_read_write(frontend, horizontal, vertical):
"""
Test pragma-assisted splitting of reads and writes.
"""

fcode = """
subroutine kernel(nlon, nz, start, end, n1, n2, n3, var0, var1, var2, nfre)
implicit none
integer, intent(in) :: nlon, nz, n1, n2, n3, start, end, nfre
real, intent(inout) :: var0(nlon,nfre,6), var1(nlon, nz, 6), var2(nlon,nz)
integer :: jl, jk, m
!$loki split-read-write
do jk = 1,nz
do jl = start,end
var1(jl, jk, n1) = var1(jl, jk, n1) + 1.
var1(jl, jk, n1) = var1(jl, jk, n1) * 2.
var1(jl, jk, n2) = var1(jl, jk, n2) + var1(jl, jk, n1)
var2(jl, jk ) = 0.
end do
end do
print *, "a leaf node that shouldn't be copied"
!$loki end split-read-write
!.....should be transformed to........
!!$loki split-read-write
! do jk=1,nz
! do jl=start,end
! loki_temp_0(jl, jk) = var1(jl, jk, n1) + 1.
! loki_temp_0(jl, jk) = loki_temp_0(jl, jk)*2.
! loki_temp_1(jl, jk) = var1(jl, jk, n2) + loki_temp_0(jl, jk)
! var2(jl, jk) = 0.
! end do
! end do
! print *, 'a leaf node that shouldn''t be copied'
! do jk=1,nz
! do jl=start,end
! var1(jl, jk, n1) = loki_temp_0(jl, jk)
! var1(jl, jk, n2) = loki_temp_1(jl, jk)
! end do
! end do
!!$loki end split-read-write
do m = 1,nfre
!$loki split-read-write
if( m < nfre/2 )then
do jl = start,end
var0(jl, m, n3) = var0(jl, m, n3) + 1.
end do
endif
!$loki end split-read-write
!.....should be transformed to........
!!$loki split-read-write
! if (m < nfre / 2) then
! do jl=start,end
! loki_temp_2(jl) = var0(jl, m, n3) + 1.
! end do
! end if
! if (m < nfre / 2) then
! do jl=start,end
! var0(jl, m, n3) = loki_temp_2(jl)
! end do
! end if
!!$loki end split-read-write
end do
end subroutine kernel
"""

routine = Subroutine.from_source(fcode, frontend=frontend)
SplitReadWriteTransformation(dimensions=(horizontal, vertical)).apply(routine)

with pragma_regions_attached(routine):

pragma_regions = FindNodes(ir.PragmaRegion).visit(routine.body)
assert len(pragma_regions) == 2

#=========== check first pragma region ==============#
region = pragma_regions[0]
assert is_loki_pragma(region.pragma, starts_with='split-read-write')

# check that temporaries were declared
assert 'loki_temp_0(nlon,nz)' in routine.variables
assert 'loki_temp_1(nlon,nz)' in routine.variables

# check correctly nested loops
outer_loops = [l for l in FindNodes(ir.Loop).visit(region.body) if l.variable == 'jk']
assert len(outer_loops) == 2
for loop in outer_loops:
_loops = FindNodes(ir.Loop).visit(loop.body)
assert len(_loops) == 1
assert _loops[0].variable == 'jl'

# check simple assignment is only in first copy of region
assert 'var2(jl,jk)' in FindVariables().visit(outer_loops[0])
assert not 'var2(jl,jk)' in FindVariables().visit(outer_loops[1])

# check print statement is only present in first copy of region
assert len(FindNodes(ir.Intrinsic).visit(region)) == 1

# check correctness of split reads
assigns = FindNodes(ir.Assignment).visit(outer_loops[0].body)
assert len(assigns) == 4
assert assigns[0].lhs == assigns[1].lhs
assert assigns[1].rhs == f'{assigns[0].lhs}*2.'
assert assigns[2].lhs != assigns[0].lhs
assert assigns[2].lhs.dimensions == assigns[0].lhs.dimensions
assert f'{assigns[0].lhs}' in assigns[2].rhs

# check correctness of split writes
_assigns = FindNodes(ir.Assignment).visit(outer_loops[1].body)
assert len(_assigns) == 2
assert _assigns[0].lhs == 'var1(jl, jk, n1)'
assert _assigns[1].lhs == 'var1(jl, jk, n2)'
assert _assigns[0].rhs == assigns[0].lhs
assert _assigns[1].rhs == assigns[2].lhs


#=========== check second pragma region ==============#
region = pragma_regions[1]
assert is_loki_pragma(region.pragma, starts_with='split-read-write')

conds = FindNodes(ir.Conditional).visit(region.body)
assert len(conds) == 2

# check that temporaries were declared
assert 'loki_temp_2(nlon)' in routine.variables

# check correctness of split reads
assigns = FindNodes(ir.Assignment).visit(conds[0])
assert len(assigns) == 1
assert assigns[0].lhs == 'loki_temp_2(jl)'
assert 'var0(jl, m, n3)' in assigns[0].rhs

# check correctness of split writes
assigns = FindNodes(ir.Assignment).visit(conds[1])
assert len(assigns) == 1
assert assigns[0].lhs == 'var0(jl, m, n3)'
assert assigns[0].rhs == 'loki_temp_2(jl)'
4 changes: 3 additions & 1 deletion scripts/loki_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,9 @@ def convert(
mode = mode.replace('-', '_') # Sanitize mode string

# Write out all modified source files into the build directory
file_write_trafo = FileWriteTransformation(builddir=build, mode=mode)
file_write_trafo = scheduler.config.transformations.get('FileWriteTransformation', None)
if not file_write_trafo:
file_write_trafo = FileWriteTransformation(builddir=build, mode=mode)
scheduler.process(transformation=file_write_trafo)

return
Expand Down

0 comments on commit cb2d5d5

Please sign in to comment.