Skip to content

Commit

Permalink
SplitReadWriteTrafo: first implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
awnawab committed Jun 13, 2024
1 parent d5a8e6c commit 4974f6b
Show file tree
Hide file tree
Showing 3 changed files with 274 additions and 0 deletions.
1 change: 1 addition & 0 deletions loki/transformations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@
from loki.transformations.pool_allocator import * # noqa
from loki.transformations.utilities import * # noqa
from loki.transformations.block_index_transformations import * # noqa
from loki.transformations.split_read_write import * # noqa
143 changes: 143 additions & 0 deletions loki/transformations/split_read_write.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# (C) Copyright 2018- ECMWF.
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.


from loki.batch import Transformation, ProcedureItem
from loki.expression import FindVariables, Array, SubstituteExpressions
from loki.tools import as_tuple, flatten
from loki.ir import (
pragma_regions_attached, is_loki_pragma, nodes as ir, FindNodes,
Transformer, FindScopes
)

__all__ = ['SplitReadWriteTransformation']

class SplitReadWriteTransformation(Transformation):
"""
When accumulating values to multiple components of an array, a compiler cannot rule out
the possibility that the indices alias the same address. Consider for example the following
code:
.. code-block:: fortran
!$loki split read-write
do jlon=1,nproma
var(jlon, n1) = var(jlon, n1) + 1.
var(jlon, n2) = var(jlon, n2) + 1.
enddo
!$loki end split read-write
In the above example, there is no guarantee that ``n1`` and ``n2`` do not in fact point to the same location.
Therefore the load and store instructions for ``var`` have to be executed in order.
For cases where the user knows ``n1`` and ``n2`` indeed represent distinct locations, this transformation
provides a pragma assisted mechanism to split the reads and writes, and therefore make the loads independent
from the stores. The above code would therefore be transformed to:
.. code-block:: fortran
!$loki split read-write
do jlon=1,nproma
loki_temp_0(jlon) = var(jlon, n1) + 1.
loki_temp_1(jlon) = var(jlon, n2) + 1.
enddo
do jlon=1,nproma
var(jlon, n1) = loki_temp_0(jlon)
var(jlon, n2) = loki_temp_1(jlon)
enddo
!$loki end split read-write
"""

item_filter = (ProcedureItem,)

def __init__(self, dimensions):
self.dimensions = dimensions

def transform_subroutine(self, routine, **kwargs):

# initialise working vars, lists and maps
temp_vars = []
region_map = {}
temp_counter = 0

# cache variable_map for fast lookup later
variable_map = routine.variable_map

# find split read-write pragmas
with pragma_regions_attached(routine):
for region in FindNodes(ir.PragmaRegion).visit(routine.body):
if is_loki_pragma(region.pragma, starts_with='split read-write'):

# find assignments inside pragma region
assigns = FindNodes(ir.Assignment).visit(region.body)

# filter-out non read-write assignments
assigns = [a for a in assigns if a.lhs in FindVariables().visit(a.rhs)]

# filter-out scalars
assigns = [a for a in assigns if isinstance(a.lhs, Array)]

# delete all leafnodes in second copy of region
assign_read_map = {}
assign_write_map = {leaf: None for leaf in FindNodes(ir.LeafNode).visit(region.body)}

lhs_var_map = {}
lhs_vars = set(a.lhs for a in assigns)
lhs_var_read_map = {var: False for var in lhs_vars}
temp_counter_map = {var: count + temp_counter for count, var in enumerate(lhs_vars)}
temp_counter += len(temp_counter_map)

for assign in assigns:

# determine all ancestor loops of assignment
parent_loop_dims = []
ancestors = flatten(FindScopes(assign).visit(region.body))
for a in ancestors:
if isinstance(a, ir.Loop):
dim = [dim for dim in self.dimensions if a.variable.name.lower() == dim.index.lower()]
assert dim
parent_loop_dims += [dim[0]]

# determine shape of temporary declaration and assignment
_shape = []
_dims = []
for s in assign.lhs.type.shape:
if (dim := [dim for dim in self.dimensions if s in dim.size_expressions]):
if dim[0] in parent_loop_dims:
_shape += [variable_map[dim[0].size]]
_dims += [variable_map[dim[0].index]]

# define vars to store temporary assignment
_type = assign.lhs.type.clone(shape=as_tuple(_shape), intent=None)
temp_vars += [assign.lhs.clone(name=f'loki_temp_{temp_counter_map[assign.lhs]}',
dimensions=as_tuple(_dims), type=_type),]

# split reads and writes
rhs = SubstituteExpressions(lhs_var_map).visit(assign.rhs)
if not lhs_var_read_map[assign.lhs]:
lhs_var_map.update({assign.lhs: temp_vars[-1]})
lhs_var_read_map[assign.lhs] = True

new_write = ir.Assignment(lhs=assign.lhs, rhs=temp_vars[-1])
assign_write_map[assign] = as_tuple(new_write)

new_read = ir.Assignment(lhs=temp_vars[-1], rhs=rhs)
assign_read_map[assign] = as_tuple(new_read)

# create two copies of the pragma region, the second containing
# only the newly split writes
new_reads = Transformer(assign_read_map).visit(region.body)
new_writes = Transformer(assign_write_map).visit(region.body)
region_map[region.body] = (new_reads, new_writes)

# add declarations for temporaries
if temp_vars:
temp_vars = set(var.clone(dimensions=var.type.shape) for var in temp_vars)
routine.variables += as_tuple(temp_vars)

routine.body = Transformer(region_map).visit(routine.body)
130 changes: 130 additions & 0 deletions loki/transformations/tests/test_split_read_write.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# (C) Copyright 2018- ECMWF.
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

import pytest

from loki.expression import FindVariables
from loki import Dimension, Subroutine
from loki.ir import FindNodes, nodes as ir, pragma_regions_attached, is_loki_pragma
from loki.frontend import available_frontends
from loki.transformations import SplitReadWriteTransformation


@pytest.fixture(scope='module', name='horizontal')
def fixture_horizontal():
return Dimension(name='horizontal', size='nlon', index='jl', bounds=('start', 'end'), aliases=('nproma',))

@pytest.fixture(scope='module', name='vertical')
def fixture_vertical():
return Dimension(name='vertical', size='nz', index='jk', aliases=('nlev',))


@pytest.mark.parametrize('frontend', available_frontends())
def test_split_read_write(frontend, horizontal, vertical):
"""
Test pragma-assisted splitting of reads and writes.
"""

fcode = """
subroutine kernel(nlon, nz, start, end, n1, n2, n3, var0, var1, var2, nfre)
implicit none
integer, intent(in) :: nlon, nz, n1, n2, n3, start, end, nfre
real, intent(inout) :: var0(nlon,nfre,6), var1(nlon, nz, 6), var2(nlon,nz)
integer :: jl, jk, m
!$loki split read-write
do jk = 1,nz
do jl = start,end
var1(jl, jk, n1) = var1(jl, jk, n1) + 1.
var1(jl, jk, n1) = var1(jl, jk, n1) * 2.
var1(jl, jk, n2) = var1(jl, jk, n2) + var1(jl, jk, n1)
var2(jl, jk ) = 0.
end do
end do
!$loki end split read-write
do m = 1,nfre
!$loki split read-write
if( m < nfre/2 )then
do jl = start,end
var0(jl, m, n3) = var0(jl, m, n3) + 1.
end do
endif
!$loki end split read-write
end do
end subroutine kernel
"""

routine = Subroutine.from_source(fcode, frontend=frontend)
SplitReadWriteTransformation(dimensions=(horizontal, vertical)).apply(routine)

with pragma_regions_attached(routine):

pragma_regions = FindNodes(ir.PragmaRegion).visit(routine.body)
assert len(pragma_regions) == 2

#=========== check first pragma region ==============#
region = pragma_regions[0]
assert is_loki_pragma(region.pragma, starts_with='split read-write')

# check that temporaries were declared
assert 'loki_temp_0(nlon,nz)' in routine.variables
assert 'loki_temp_1(nlon,nz)' in routine.variables

# check correctly nested loops
outer_loops = [l for l in FindNodes(ir.Loop).visit(region.body) if l.variable == 'jk']
assert len(outer_loops) == 2
for loop in outer_loops:
_loops = FindNodes(ir.Loop).visit(loop.body)
assert len(_loops) == 1
assert _loops[0].variable == 'jl'

# check simple assignment is only in first copy of region
assert 'var2(jl,jk)' in FindVariables().visit(outer_loops[0])
assert not 'var2(jl,jk)' in FindVariables().visit(outer_loops[1])

# check correctness of split reads
assigns = FindNodes(ir.Assignment).visit(outer_loops[0].body)
assert len(assigns) == 4
assert assigns[0].lhs == assigns[1].lhs
assert assigns[1].rhs == f'{assigns[0].lhs}*2.'
assert assigns[2].lhs != assigns[0].lhs
assert assigns[2].lhs.dimensions == assigns[0].lhs.dimensions
assert f'{assigns[0].lhs}' in assigns[2].rhs

# check correctness of split writes
_assigns = FindNodes(ir.Assignment).visit(outer_loops[1].body)
assert len(_assigns) == 2
assert _assigns[0].lhs == 'var1(jl, jk, n1)'
assert _assigns[1].lhs == 'var1(jl, jk, n2)'
assert _assigns[0].rhs == assigns[0].lhs
assert _assigns[1].rhs == assigns[2].lhs


#=========== check second pragma region ==============#
region = pragma_regions[1]
assert is_loki_pragma(region.pragma, starts_with='split read-write')

conds = FindNodes(ir.Conditional).visit(region.body)
assert len(conds) == 2

# check that temporaries were declared
assert 'loki_temp_2(nlon)' in routine.variables

# check correctness of split reads
assigns = FindNodes(ir.Assignment).visit(conds[0])
assert len(assigns) == 1
assert assigns[0].lhs == 'loki_temp_2(jl)'
assert 'var0(jl, m, n3)' in assigns[0].rhs

# check correctness of split writes
assigns = FindNodes(ir.Assignment).visit(conds[1])
assert len(assigns) == 1
assert assigns[0].lhs == 'var0(jl, m, n3)'
assert assigns[0].rhs == 'loki_temp_2(jl)'

0 comments on commit 4974f6b

Please sign in to comment.