Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Skip privatization of arrays with existing data declarations #389

Merged
merged 2 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 56 additions & 8 deletions loki/transformations/single_column/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

from collections import defaultdict
from loki.batch import Transformation
from loki.expression import symbols as sym, is_dimension_constant
from loki.ir import (
nodes as ir, FindNodes, FindVariables, Transformer,
pragmas_attached, is_loki_pragma, get_pragma_parameters
pragmas_attached, is_loki_pragma, get_pragma_parameters,
pragma_regions_attached
)
from loki.logging import info
from loki.tools import as_tuple, flatten
Expand Down Expand Up @@ -192,11 +194,55 @@ def transform_subroutine(self, routine, **kwargs):
# Mark all non-parallel loops as `!$acc loop seq`
self.annotate_sequential_loops(routine)

with pragmas_attached(routine, ir.Loop, attach_pragma_post=True):
driver_loops = find_driver_loops(routine=routine, targets=targets)
for loop in driver_loops:
self.annotate_driver_loop(loop)
with pragma_regions_attached(routine):
with pragmas_attached(routine, ir.Loop, attach_pragma_post=True):
# Find variables with existing OpenACC data declarations
acc_vars = self.find_acc_vars(routine, targets)

driver_loops = find_driver_loops(section=routine.body, targets=targets)
for loop in driver_loops:
self.annotate_driver_loop(loop, acc_vars.get(loop, []))

def find_acc_vars(self, routine, targets):
"""
Find variables already specified in acc data clauses.

Parameters
----------
routine : :any:`Subroutine`
Subroutine to apply this transformation to.
targets : list or string
List of subroutines that are to be considered as part of
the transformation call tree.
"""

acc_vars = defaultdict(list)

for region in FindNodes(ir.PragmaRegion).visit(routine.body):
if region.pragma.keyword.lower() == 'acc':
if (parameters := get_pragma_parameters(region.pragma, starts_with='data', only_loki_pragmas=False)):

driver_loops = find_driver_loops(section=region.body, targets=targets)
if not driver_loops:
continue

if (default := parameters.get('default', None)):
if not 'none' in [p.strip().lower() for p in default.split(',')]:
for loop in driver_loops:

_vars = [var.name.lower() for var in FindVariables(unique=True).visit(loop)]
acc_vars[loop] += _vars
else:
_vars = [
p.strip().lower()
for category in ('present', 'copy', 'copyin', 'copyout', 'deviceptr')
for p in parameters.get(category, '').split(',')
]

for loop in driver_loops:
acc_vars[loop] += _vars

return acc_vars

@classmethod
def device_alloc_column_locals(cls, routine, column_locals):
Expand All @@ -219,14 +265,16 @@ def device_alloc_column_locals(cls, routine, column_locals):
routine.body.prepend((ir.Comment(''), pragma, ir.Comment('')))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[no action required] This is not covered in tests, so we don't seem to be testing this.
(@mlange05 for info)

routine.body.append((ir.Comment(''), pragma_post, ir.Comment('')))

def annotate_driver_loop(self, loop):
def annotate_driver_loop(self, loop, acc_vars):
"""
Annotate driver block loop with ``'openacc'`` pragmas.

Parameters
----------
loop : :any:`Loop`
Driver :any:`Loop` to wrap in ``'opencc'`` pragmas.
Driver :any:`Loop` to wrap in ``'openacc'`` pragmas.
acc_vars : list
Variables already declared in ``'openacc'`` data directives.
"""

# Mark driver loop as "gang parallel".
Expand All @@ -239,7 +287,7 @@ def annotate_driver_loop(self, loop):
# Filter out arrays that are explicitly allocated with block dimension
sizes = self.block_dim.size_expressions
arrays = [v for v in arrays if not any(d in sizes for d in as_tuple(v.shape))]
private_arrays = ', '.join(set(v.name for v in arrays))
private_arrays = ', '.join(set(v.name for v in arrays if not v.name_parts[0].lower() in acc_vars))
private_clause = '' if not private_arrays else f' private({private_arrays})'

for pragma in as_tuple(loop.pragma):
Expand Down
29 changes: 20 additions & 9 deletions loki/transformations/single_column/tests/test_scc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from loki import Subroutine, Sourcefile, Dimension, fgen
from loki.batch import ProcedureItem
from loki.expression import Scalar, Array, IntLiteral, RangeIndex
from loki.expression import Scalar, Array, IntLiteral
from loki.frontend import available_frontends, OMNI, OFP
from loki.ir import (
FindNodes, Assignment, CallStatement, Conditional, Loop,
Expand Down Expand Up @@ -236,32 +236,40 @@ def test_scc_demote_transformation(frontend, horizontal):


@pytest.mark.parametrize('frontend', available_frontends())
def test_scc_annotate_openacc(frontend, horizontal, blocking):
@pytest.mark.parametrize('acc_data', ['default', 'copyin', None])
def test_scc_annotate_openacc(frontend, horizontal, blocking, acc_data):
"""
Test the correct addition of OpenACC pragmas to SCC format code (no hoisting).
"""

fcode_driver = """
fcode_driver = f"""
SUBROUTINE column_driver(nlon, nproma, nlev, nz, q, nb)
INTEGER, INTENT(IN) :: nlon, nz, nb ! Size of the horizontal and vertical
INTEGER, INTENT(IN) :: nproma, nlev ! Aliases of horizontal and vertical sizes
REAL, INTENT(INOUT) :: q(nlon,nz,nb)
REAL :: other_var(nlon)
INTEGER :: b, start, end

start = 1
end = nlon
{'!$acc data default(present)' if acc_data == 'default' else ''}
{'!$acc data copyin(other_var)' if acc_data == 'copyin' else ''}
!
do b=1, nb
call compute_column(start, end, nlon, nproma, nz, q(:,:,b))
call compute_column(start, end, nlon, nproma, nz, q(:,:,b), other_var)
end do
!
{'!$acc end data' if acc_data else ''}
END SUBROUTINE column_driver
"""

fcode_kernel = """
SUBROUTINE compute_column(start, end, nlon, nproma, nlev, nz, q)
SUBROUTINE compute_column(start, end, nlon, nproma, nlev, nz, q, other_var)
INTEGER, INTENT(IN) :: start, end ! Iteration indices
INTEGER, INTENT(IN) :: nlon, nz ! Size of the horizontal and vertical
INTEGER, INTENT(IN) :: nproma, nlev ! Aliases of horizontal and vertical sizes
REAL, INTENT(INOUT) :: q(nlon,nz)
REAL, INTENT(IN) :: other_var
REAL :: t(nlon,nz)
REAL :: a(nlon)
REAL :: d(nproma)
Expand Down Expand Up @@ -326,8 +334,11 @@ def test_scc_annotate_openacc(frontend, horizontal, blocking):
with pragmas_attached(driver, Loop):
driver_loops = FindNodes(Loop).visit(driver.body)
assert len(driver_loops) == 1
assert driver_loops[0].pragma[0].keyword == 'acc'
assert driver_loops[0].pragma[0].content == 'parallel loop gang vector_length(nlon)'
assert driver_loops[0].pragma[0].keyword.lower() == 'acc'
if acc_data:
assert driver_loops[0].pragma[0].content == 'parallel loop gang vector_length(nlon)'
else:
assert driver_loops[0].pragma[0].content == 'parallel loop gang private(other_var) vector_length(nlon)'


@pytest.mark.parametrize('frontend', available_frontends())
Expand Down Expand Up @@ -750,7 +761,7 @@ def test_scc_multiple_acc_pragmas(frontend, horizontal, blocking):


@pytest.mark.parametrize('frontend', available_frontends())
def test_scc_annotate_routine_seq_pragma(frontend, horizontal, blocking):
def test_scc_annotate_routine_seq_pragma(frontend, blocking):
"""
Test that `!$loki routine seq` pragmas are replaced correctly by
`!$acc routine seq` pragmas.
Expand Down Expand Up @@ -790,7 +801,7 @@ def test_scc_annotate_routine_seq_pragma(frontend, horizontal, blocking):


@pytest.mark.parametrize('frontend', available_frontends())
def test_scc_annotate_empty_data_clause(frontend, horizontal, blocking):
def test_scc_annotate_empty_data_clause(frontend, blocking):
"""
Test that we do not generate empty `!$acc data` clauses.
"""
Expand Down
4 changes: 2 additions & 2 deletions loki/transformations/single_column/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def process_driver(self, routine, targets=()):
"""

with pragmas_attached(routine, ir.Loop, attach_pragma_post=True):
driver_loops = find_driver_loops(routine=routine, targets=targets)
driver_loops = find_driver_loops(section=routine.body, targets=targets)

# remove vector loops
driver_loop_map = {}
Expand Down Expand Up @@ -435,7 +435,7 @@ def transform_subroutine(self, routine, **kwargs):

if role == 'driver':
with pragmas_attached(routine, ir.Loop):
driver_loops = find_driver_loops(routine=routine, targets=targets)
driver_loops = find_driver_loops(section=routine.body, targets=targets)

for loop in driver_loops:
# Revector all marked sections within the driver loop body
Expand Down
16 changes: 8 additions & 8 deletions loki/transformations/tests/test_loop_blocking.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_1d_splitting(tmp_path, frontend, block_size, n):
num_loops = len(loops)
num_vars = len(routine.variable_map)
with pragmas_attached(routine, Loop):
loops = find_driver_loops(routine,
loops = find_driver_loops(routine.body,
targets=None)
splitting_vars, inner_loop, outer_loop = split_loop(routine, loops[0], block_size)
loops = FindNodes(ir.Loop).visit(routine.ir)
Expand Down Expand Up @@ -94,7 +94,7 @@ def test_1d_splitting_multi_var(tmp_path, frontend, block_size, n):
num_loops = len(loops)
num_vars = len(routine.variable_map)
with pragmas_attached(routine, Loop):
loops = find_driver_loops(routine,
loops = find_driver_loops(routine.body,
targets=None)
splitting_vars, inner_loop, outer_loop = split_loop(routine, loops[0], block_size)
loops = FindNodes(ir.Loop).visit(routine.ir)
Expand Down Expand Up @@ -142,7 +142,7 @@ def test_2d_splitting(tmp_path, frontend, block_size, n):
num_loops = len(loops)
num_vars = len(routine.variable_map)
with pragmas_attached(routine, Loop):
loops = find_driver_loops(routine,
loops = find_driver_loops(routine.body,
targets=None)
splitting_vars, inner_loop, outer_loop = split_loop(routine, loops[0], block_size)
loops = FindNodes(ir.Loop).visit(routine.ir)
Expand Down Expand Up @@ -192,7 +192,7 @@ def test_3d_splitting(tmp_path, frontend, block_size, n):
num_loops = len(loops)
num_vars = len(routine.variable_map)
with pragmas_attached(routine, Loop):
loops = find_driver_loops(routine,
loops = find_driver_loops(routine.body,
targets=None)
splitting_vars, inner_loop, outer_loop = split_loop(routine, loops[0], block_size)
loops = FindNodes(ir.Loop).visit(routine.ir)
Expand Down Expand Up @@ -250,7 +250,7 @@ def test_1d_blocking(tmp_path, frontend, block_size, n):
routine = Subroutine.from_source(fcode, frontend=frontend)
loops = FindNodes(ir.Loop).visit(routine.ir)
with pragmas_attached(routine, Loop):
loops = find_driver_loops(routine,
loops = find_driver_loops(routine.body,
targets=None)

num_loops = len(loops)
Expand Down Expand Up @@ -309,7 +309,7 @@ def test_1d_blocking_multi_intent(tmp_path, frontend, block_size, n):
routine = Subroutine.from_source(fcode, frontend=frontend)
loops = FindNodes(ir.Loop).visit(routine.ir)
with pragmas_attached(routine, Loop):
loops = find_driver_loops(routine,
loops = find_driver_loops(routine.body,
targets=None)

num_loops = len(loops)
Expand Down Expand Up @@ -372,7 +372,7 @@ def test_2d_blocking(tmp_path, frontend, block_size, n):
num_loops = len(loops)
num_vars = len(routine.variable_map)
with pragmas_attached(routine, Loop):
loops = find_driver_loops(routine,
loops = find_driver_loops(routine.body,
targets=None)
splitting_vars, inner_loop, outer_loop = split_loop(routine, loops[0], block_size)
loops = FindNodes(ir.Loop).visit(routine.ir)
Expand Down Expand Up @@ -432,7 +432,7 @@ def test_3d_blocking(tmp_path, frontend, block_size, n):
num_loops = len(loops)
num_vars = len(routine.variable_map)
with pragmas_attached(routine, Loop):
loops = find_driver_loops(routine,
loops = find_driver_loops(routine.body,
targets=None)
splitting_vars, inner_loop, outer_loop = split_loop(routine, loops[0], block_size)
loops = FindNodes(ir.Loop).visit(routine.ir)
Expand Down
8 changes: 4 additions & 4 deletions loki/transformations/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,16 +595,16 @@ def is_driver_loop(loop, targets):
return False


def find_driver_loops(routine, targets):
def find_driver_loops(section, targets):
"""
Find and return all driver loops of a given `routine`.
Find and return all driver loops in a given `section`.

A *driver loop* is specified either by a call to a routine within
`targets` or by the pragma `!$loki driver-loop`.

Parameters
----------
routine : :any:`Subroutine`
section : :any:`Section` or tuple
The subroutine in which to find the driver loops.
targets : list or string
List of subroutines that are to be considered as part of
Expand All @@ -613,7 +613,7 @@ def find_driver_loops(routine, targets):

driver_loops = []
nested_driver_loops = []
for loop in FindNodes(ir.Loop).visit(routine.body):
for loop in FindNodes(ir.Loop).visit(section):
if loop in nested_driver_loops:
continue

Expand Down
Loading