-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #329 from ecmwf-ifs/naan-split-read-write
Split reads and writes for certain accumulation patterns
- Loading branch information
Showing
4 changed files
with
336 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
# (C) Copyright 2018- ECMWF. | ||
# This software is licensed under the terms of the Apache Licence Version 2.0 | ||
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. | ||
# In applying this licence, ECMWF does not waive the privileges and immunities | ||
# granted to it by virtue of its status as an intergovernmental organisation | ||
# nor does it submit to any jurisdiction. | ||
|
||
|
||
from loki.batch import Transformation, ProcedureItem | ||
from loki.expression import Array, SubstituteExpressions | ||
from loki.tools import as_tuple | ||
from loki.ir import ( | ||
pragma_regions_attached, is_loki_pragma, nodes as ir, FindNodes, Transformer | ||
) | ||
|
||
__all__ = ['SplitReadWriteTransformation'] | ||
|
||
class SplitReadWriteWalk(Transformer): | ||
""" | ||
A :any:`Transformer` class to traverse the IR, in-place replace read-write | ||
assignments with reads, and build a transformer map for the corresponding writes. | ||
Parameters | ||
---------- | ||
dimensions : list | ||
A list of :any:`Dimension` objects corresponding to all :any:`Loop`s in the ``!$loki split-read-write`` region. | ||
variable_map : dict | ||
The variable_map of the parent :any:`Subroutine`. | ||
count : int | ||
A running count of the newly created temporaries in the parent :any:`Subroutine` so that | ||
temporaries created by previous ``!$loki split-read-write`` regions are not redefined. | ||
""" | ||
|
||
def __init__(self, dimensions, variable_map, count=-1, **kwargs): | ||
self.write_map = {} | ||
self.temp_count = count | ||
self.lhs_var_map = {} | ||
self.dimensions = dimensions | ||
self.tmp_vars = [] | ||
|
||
# parent subroutine variable_map | ||
self.variable_map = variable_map | ||
|
||
kwargs['inplace'] = True | ||
super().__init__(**kwargs) | ||
|
||
def visit_Loop(self, o, **kwargs): | ||
|
||
dim = [d for d in self.dimensions if d.index == o.variable] | ||
dim_nest = kwargs.pop('dim_nest', []) | ||
return super().visit_Node(o, dim_nest=dim_nest + dim, **kwargs) | ||
|
||
def visit_Assignment(self, o, **kwargs): | ||
|
||
dim_nest = kwargs.pop('dim_nest', []) | ||
write = None | ||
|
||
# filter out non read-write assignments and scalars | ||
if isinstance(o.lhs, Array) and o.lhs.name in o.rhs: | ||
|
||
rhs = SubstituteExpressions(self.lhs_var_map).visit(o.rhs) | ||
if not o.lhs in self.lhs_var_map: | ||
_dims = [] | ||
_shape = [] | ||
|
||
# determine shape of temporary declaration and assignment | ||
for s in o.lhs.type.shape: | ||
if (dim := [dim for dim in self.dimensions | ||
if s in dim.size_expressions]): | ||
if dim[0] in dim_nest: | ||
_shape += [self.variable_map[dim[0].size]] | ||
_dims += [self.variable_map[dim[0].index]] | ||
|
||
# define var to store temporary assignment | ||
self.temp_count += 1 | ||
_type = o.lhs.type.clone(shape=as_tuple(_shape), intent=None) | ||
tmp_var = o.lhs.clone(name=f'loki_temp_{self.temp_count}', | ||
dimensions=as_tuple(_dims), type=_type) | ||
self.lhs_var_map[o.lhs] = tmp_var | ||
self.tmp_vars += [tmp_var,] | ||
|
||
write = as_tuple(ir.Assignment(lhs=o.lhs, rhs=tmp_var)) | ||
|
||
o._update(lhs=self.lhs_var_map[o.lhs], rhs=rhs) | ||
|
||
self.write_map[o] = write | ||
return o | ||
|
||
def visit_LeafNode(self, o, **kwargs): | ||
# remove all other leaf nodes from second copy of region | ||
self.write_map[o] = None | ||
return super().visit_Node(o, **kwargs) | ||
|
||
class SplitReadWriteTransformation(Transformation): | ||
""" | ||
When accumulating values to multiple components of an array, a compiler cannot rule out | ||
the possibility that the indices alias the same address. Consider for example the following | ||
code: | ||
.. code-block:: fortran | ||
!$loki split-read-write | ||
do jlon=1,nproma | ||
var(jlon, n1) = var(jlon, n1) + 1. | ||
var(jlon, n2) = var(jlon, n2) + 1. | ||
enddo | ||
!$loki end split-read-write | ||
In the above example, there is no guarantee that ``n1`` and ``n2`` do not in fact point to the same location. | ||
Therefore the load and store instructions for ``var`` have to be executed in order. | ||
For cases where the user knows ``n1`` and ``n2`` indeed represent distinct locations, this transformation | ||
provides a pragma assisted mechanism to split the reads and writes, and therefore make the loads independent | ||
from the stores. The above code would therefore be transformed to: | ||
.. code-block:: fortran | ||
!$loki split-read-write | ||
do jlon=1,nproma | ||
loki_temp_0(jlon) = var(jlon, n1) + 1. | ||
loki_temp_1(jlon) = var(jlon, n2) + 1. | ||
enddo | ||
do jlon=1,nproma | ||
var(jlon, n1) = loki_temp_0(jlon) | ||
var(jlon, n2) = loki_temp_1(jlon) | ||
enddo | ||
!$loki end split-read-write | ||
Parameters | ||
---------- | ||
dimensions : list | ||
A list of :any:`Dimension` objects corresponding to all :any:`Loop`s in the ``!$loki split-read-write`` region. | ||
""" | ||
|
||
item_filter = (ProcedureItem,) | ||
|
||
def __init__(self, dimensions): | ||
self.dimensions = as_tuple(dimensions) | ||
|
||
def transform_subroutine(self, routine, **kwargs): | ||
|
||
# cache variable_map for fast lookup later | ||
variable_map = routine.variable_map | ||
temp_counter = -1 | ||
tmp_vars = [] | ||
|
||
# find split read-write pragmas | ||
with pragma_regions_attached(routine): | ||
for region in FindNodes(ir.PragmaRegion).visit(routine.body): | ||
if is_loki_pragma(region.pragma, starts_with='split-read-write'): | ||
|
||
transformer = SplitReadWriteWalk(self.dimensions, variable_map, count=temp_counter) | ||
transformer.visit(region.body) | ||
|
||
temp_counter += (transformer.temp_count + 1) | ||
tmp_vars += transformer.tmp_vars | ||
|
||
if transformer.write_map: | ||
new_writes = Transformer(transformer.write_map).visit(region.body) | ||
region.append(new_writes) | ||
|
||
# add declarations for temporaries | ||
if tmp_vars: | ||
tmp_vars = set(var.clone(dimensions=var.type.shape) for var in tmp_vars) | ||
routine.variables += as_tuple(tmp_vars) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
# (C) Copyright 2018- ECMWF. | ||
# This software is licensed under the terms of the Apache Licence Version 2.0 | ||
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. | ||
# In applying this licence, ECMWF does not waive the privileges and immunities | ||
# granted to it by virtue of its status as an intergovernmental organisation | ||
# nor does it submit to any jurisdiction. | ||
|
||
import pytest | ||
|
||
from loki.expression import FindVariables | ||
from loki import Dimension, Subroutine | ||
from loki.ir import FindNodes, nodes as ir, pragma_regions_attached, is_loki_pragma | ||
from loki.frontend import available_frontends | ||
from loki.transformations import SplitReadWriteTransformation | ||
|
||
|
||
@pytest.fixture(scope='module', name='horizontal') | ||
def fixture_horizontal(): | ||
return Dimension(name='horizontal', size='nlon', index='jl', bounds=('start', 'end'), aliases=('nproma',)) | ||
|
||
@pytest.fixture(scope='module', name='vertical') | ||
def fixture_vertical(): | ||
return Dimension(name='vertical', size='nz', index='jk', aliases=('nlev',)) | ||
|
||
|
||
@pytest.mark.parametrize('frontend', available_frontends()) | ||
def test_split_read_write(frontend, horizontal, vertical): | ||
""" | ||
Test pragma-assisted splitting of reads and writes. | ||
""" | ||
|
||
fcode = """ | ||
subroutine kernel(nlon, nz, start, end, n1, n2, n3, var0, var1, var2, nfre) | ||
implicit none | ||
integer, intent(in) :: nlon, nz, n1, n2, n3, start, end, nfre | ||
real, intent(inout) :: var0(nlon,nfre,6), var1(nlon, nz, 6), var2(nlon,nz) | ||
integer :: jl, jk, m | ||
!$loki split-read-write | ||
do jk = 1,nz | ||
do jl = start,end | ||
var1(jl, jk, n1) = var1(jl, jk, n1) + 1. | ||
var1(jl, jk, n1) = var1(jl, jk, n1) * 2. | ||
var1(jl, jk, n2) = var1(jl, jk, n2) + var1(jl, jk, n1) | ||
var2(jl, jk ) = 0. | ||
end do | ||
end do | ||
print *, "a leaf node that shouldn't be copied" | ||
!$loki end split-read-write | ||
!.....should be transformed to........ | ||
!!$loki split-read-write | ||
! do jk=1,nz | ||
! do jl=start,end | ||
! loki_temp_0(jl, jk) = var1(jl, jk, n1) + 1. | ||
! loki_temp_0(jl, jk) = loki_temp_0(jl, jk)*2. | ||
! loki_temp_1(jl, jk) = var1(jl, jk, n2) + loki_temp_0(jl, jk) | ||
! var2(jl, jk) = 0. | ||
! end do | ||
! end do | ||
! print *, 'a leaf node that shouldn''t be copied' | ||
! do jk=1,nz | ||
! do jl=start,end | ||
! var1(jl, jk, n1) = loki_temp_0(jl, jk) | ||
! var1(jl, jk, n2) = loki_temp_1(jl, jk) | ||
! end do | ||
! end do | ||
!!$loki end split-read-write | ||
do m = 1,nfre | ||
!$loki split-read-write | ||
if( m < nfre/2 )then | ||
do jl = start,end | ||
var0(jl, m, n3) = var0(jl, m, n3) + 1. | ||
end do | ||
endif | ||
!$loki end split-read-write | ||
!.....should be transformed to........ | ||
!!$loki split-read-write | ||
! if (m < nfre / 2) then | ||
! do jl=start,end | ||
! loki_temp_2(jl) = var0(jl, m, n3) + 1. | ||
! end do | ||
! end if | ||
! if (m < nfre / 2) then | ||
! do jl=start,end | ||
! var0(jl, m, n3) = loki_temp_2(jl) | ||
! end do | ||
! end if | ||
!!$loki end split-read-write | ||
end do | ||
end subroutine kernel | ||
""" | ||
|
||
routine = Subroutine.from_source(fcode, frontend=frontend) | ||
SplitReadWriteTransformation(dimensions=(horizontal, vertical)).apply(routine) | ||
|
||
with pragma_regions_attached(routine): | ||
|
||
pragma_regions = FindNodes(ir.PragmaRegion).visit(routine.body) | ||
assert len(pragma_regions) == 2 | ||
|
||
#=========== check first pragma region ==============# | ||
region = pragma_regions[0] | ||
assert is_loki_pragma(region.pragma, starts_with='split-read-write') | ||
|
||
# check that temporaries were declared | ||
assert 'loki_temp_0(nlon,nz)' in routine.variables | ||
assert 'loki_temp_1(nlon,nz)' in routine.variables | ||
|
||
# check correctly nested loops | ||
outer_loops = [l for l in FindNodes(ir.Loop).visit(region.body) if l.variable == 'jk'] | ||
assert len(outer_loops) == 2 | ||
for loop in outer_loops: | ||
_loops = FindNodes(ir.Loop).visit(loop.body) | ||
assert len(_loops) == 1 | ||
assert _loops[0].variable == 'jl' | ||
|
||
# check simple assignment is only in first copy of region | ||
assert 'var2(jl,jk)' in FindVariables().visit(outer_loops[0]) | ||
assert not 'var2(jl,jk)' in FindVariables().visit(outer_loops[1]) | ||
|
||
# check print statement is only present in first copy of region | ||
assert len(FindNodes(ir.Intrinsic).visit(region)) == 1 | ||
|
||
# check correctness of split reads | ||
assigns = FindNodes(ir.Assignment).visit(outer_loops[0].body) | ||
assert len(assigns) == 4 | ||
assert assigns[0].lhs == assigns[1].lhs | ||
assert assigns[1].rhs == f'{assigns[0].lhs}*2.' | ||
assert assigns[2].lhs != assigns[0].lhs | ||
assert assigns[2].lhs.dimensions == assigns[0].lhs.dimensions | ||
assert f'{assigns[0].lhs}' in assigns[2].rhs | ||
|
||
# check correctness of split writes | ||
_assigns = FindNodes(ir.Assignment).visit(outer_loops[1].body) | ||
assert len(_assigns) == 2 | ||
assert _assigns[0].lhs == 'var1(jl, jk, n1)' | ||
assert _assigns[1].lhs == 'var1(jl, jk, n2)' | ||
assert _assigns[0].rhs == assigns[0].lhs | ||
assert _assigns[1].rhs == assigns[2].lhs | ||
|
||
|
||
#=========== check second pragma region ==============# | ||
region = pragma_regions[1] | ||
assert is_loki_pragma(region.pragma, starts_with='split-read-write') | ||
|
||
conds = FindNodes(ir.Conditional).visit(region.body) | ||
assert len(conds) == 2 | ||
|
||
# check that temporaries were declared | ||
assert 'loki_temp_2(nlon)' in routine.variables | ||
|
||
# check correctness of split reads | ||
assigns = FindNodes(ir.Assignment).visit(conds[0]) | ||
assert len(assigns) == 1 | ||
assert assigns[0].lhs == 'loki_temp_2(jl)' | ||
assert 'var0(jl, m, n3)' in assigns[0].rhs | ||
|
||
# check correctness of split writes | ||
assigns = FindNodes(ir.Assignment).visit(conds[1]) | ||
assert len(assigns) == 1 | ||
assert assigns[0].lhs == 'var0(jl, m, n3)' | ||
assert assigns[0].rhs == 'loki_temp_2(jl)' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters