From 54d28d26801c5a01e0d1f89f598cb8932ac902eb Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Wed, 2 Aug 2023 15:01:49 +0100
Subject: [PATCH 01/23] Remove typedefs from sourcefile definitions

---
 loki/sourcefile.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/loki/sourcefile.py b/loki/sourcefile.py
index 9d46eabea..fbb4e86a0 100644
--- a/loki/sourcefile.py
+++ b/loki/sourcefile.py
@@ -448,9 +448,9 @@ def all_subroutines(self):
     @property
     def definitions(self):
         """
-        List of all definitions made in this sourcefile, i.e. modules, subroutines and types
+        List of all definitions made in this sourcefile, i.e. modules and subroutines
         """
-        return self.modules + self.subroutines + self.typedefs
+        return self.modules + self.subroutines
 
     def __getitem__(self, name):
         name = name.lower()

From ac2f2f8908ec07f74e54d9c4e5ef2944f1b84204 Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Wed, 2 Aug 2023 15:11:17 +0100
Subject: [PATCH 02/23] Store parser_classes on program units and sourcefiles

---
 loki/frontend/regex.py  |  6 ++++--
 loki/module.py          | 10 ++++++----
 loki/program_unit.py    | 15 ++++++++++++---
 loki/sourcefile.py      | 17 ++++++++++++++---
 loki/subroutine.py      | 13 ++++++++-----
 tests/test_frontends.py | 37 +++++++++++++++++++++++++++++++++++++
 6 files changed, 81 insertions(+), 17 deletions(-)

diff --git a/loki/frontend/regex.py b/loki/frontend/regex.py
index 9a1f07088..c7220cfd6 100644
--- a/loki/frontend/regex.py
+++ b/loki/frontend/regex.py
@@ -447,7 +447,8 @@ def match(self, reader, parser_classes, scope):
             contains = None
 
         module.__initialize__(  # pylint: disable=unnecessary-dunder-call
-            name=module.name, spec=spec, contains=contains, source=module.source, incomplete=True
+            name=module.name, spec=spec, contains=contains, source=module.source, incomplete=True,
+            parser_classes=parser_classes
         )
 
         if match.span()[0] > 0:
@@ -538,7 +539,8 @@ def match(self, reader, parser_classes, scope):
 
         routine.__initialize__(  # pylint: disable=unnecessary-dunder-call
             name=routine.name, args=routine._dummies, is_function=routine.is_function,
-            prefix=prefix, spec=spec, contains=contains, source=routine.source, incomplete=True
+            prefix=prefix, spec=spec, contains=contains, source=routine.source,
+            incomplete=True, parser_classes=parser_classes
         )
 
         if match.span()[0] > 0:
diff --git a/loki/module.py b/loki/module.py
index 98b49d59a..96d597bc8 100644
--- a/loki/module.py
+++ b/loki/module.py
@@ -67,13 +67,15 @@ class Module(ProgramUnit):
         Mark the object as incomplete, i.e. only partially parsed. This is
         typically the case when it was instantiated using the :any:`Frontend.REGEX`
         frontend and a full parse using one of the other frontends is pending.
+    parser_classes : :any:`RegexParserClass`, optional
+        Provide the list of parser classes used during incomplete regex parsing
     """
 
     def __init__(
             self, name=None, docstring=None, spec=None, contains=None,
             default_access_spec=None, public_access_spec=None, private_access_spec=None,
             ast=None, source=None, parent=None, symbol_attrs=None, rescope_symbols=False,
-            incomplete=False
+            incomplete=False, parser_classes=None
     ):
         super().__init__(parent=parent)
 
@@ -84,12 +86,12 @@ def __init__(
             name=name, docstring=docstring, spec=spec, contains=contains,
             default_access_spec=default_access_spec, public_access_spec=public_access_spec,
             private_access_spec=private_access_spec, ast=ast, source=source,
-            rescope_symbols=rescope_symbols, incomplete=incomplete
+            rescope_symbols=rescope_symbols, incomplete=incomplete, parser_classes=parser_classes
         )
 
     def __initialize__(
             self, name=None, docstring=None, spec=None, contains=None,
-            ast=None, source=None, rescope_symbols=False, incomplete=False,
+            ast=None, source=None, rescope_symbols=False, incomplete=False, parser_classes=None,
             default_access_spec=None, public_access_spec=None, private_access_spec=None
     ):
         # Apply dimension pragma annotations to declarations
@@ -110,7 +112,7 @@ def __initialize__(
 
         super().__initialize__(
             name=name, docstring=docstring, spec=spec, contains=contains, ast=ast,
-            source=source, rescope_symbols=rescope_symbols, incomplete=incomplete
+            source=source, rescope_symbols=rescope_symbols, incomplete=incomplete, parser_classes=parser_classes
         )
 
     @classmethod
diff --git a/loki/program_unit.py b/loki/program_unit.py
index 11877e8ce..dada17134 100644
--- a/loki/program_unit.py
+++ b/loki/program_unit.py
@@ -8,7 +8,10 @@
 from abc import abstractmethod
 
 from loki import ir
-from loki.frontend import Frontend, parse_omni_source, parse_ofp_source, parse_fparser_source
+from loki.frontend import (
+    Frontend, parse_omni_source, parse_ofp_source, parse_fparser_source,
+    RegexParserClass
+)
 from loki.scope import Scope
 from loki.tools import CaseInsensitiveDict, as_tuple, flatten
 from loki.types import ProcedureType
@@ -52,16 +55,20 @@ class ProgramUnit(Scope):
         Mark the object as incomplete, i.e. only partially parsed. This is
         typically the case when it was instantiated using the :any:`Frontend.REGEX`
         frontend and a full parse using one of the other frontends is pending.
+    parser_classes : :any:`RegexParserClass`, optional
+        Provide the list of parser classes used during incomplete regex parsing
     """
 
     def __initialize__(self, name, docstring=None, spec=None, contains=None,
-                       ast=None, source=None, rescope_symbols=False, incomplete=False):
+                       ast=None, source=None, rescope_symbols=False, incomplete=False,
+                       parser_classes=None):
         # Common properties
         assert name and isinstance(name, str)
         self.name = name
         self._ast = ast
         self._source = source
         self._incomplete = incomplete
+        self._parser_classes = parser_classes
 
         # Bring arguments into shape
         if spec is not None and not isinstance(spec, ir.Section):
@@ -235,7 +242,9 @@ def make_complete(self, **frontend_args):
         frontend = frontend_args.pop('frontend', Frontend.FP)
         definitions = frontend_args.get('definitions')
         xmods = frontend_args.get('xmods')
-        parser_classes = frontend_args.get('parser_classes')
+        parser_classes = frontend_args.get('parser_classes', RegexParserClass.AllClasses)
+        if frontend == Frontend.REGEX and self._parser_classes:
+            parser_classes = parser_classes | self._parser_classes
 
         # If this object does not have a parent, we create a temporary parent scope
         # and make sure the node exists in the parent scope. This way, the existing
diff --git a/loki/sourcefile.py b/loki/sourcefile.py
index fbb4e86a0..9f60559c3 100644
--- a/loki/sourcefile.py
+++ b/loki/sourcefile.py
@@ -17,7 +17,8 @@
 from loki.frontend import (
     OMNI, OFP, FP, REGEX, sanitize_input, Source, read_file, preprocess_cpp,
     parse_omni_source, parse_ofp_source, parse_fparser_source,
-    parse_omni_ast, parse_ofp_ast, parse_fparser_ast, parse_regex_source
+    parse_omni_ast, parse_ofp_ast, parse_fparser_ast, parse_regex_source,
+    RegexParserClass
 
 )
 from loki.ir import Section, RawSource, Comment, PreprocessorDirective
@@ -54,9 +55,11 @@ class Sourcefile:
         Mark the object as incomplete, i.e. only partially parsed. This is
         typically the case when it was instantiated using the :any:`Frontend.REGEX`
         frontend and a full parse using one of the other frontends is pending.
+    parser_classes : :any:`RegexParserClass`, optional
+        Provide the list of parser classes used during incomplete regex parsing
     """
 
-    def __init__(self, path, ir=None, ast=None, source=None, incomplete=False):
+    def __init__(self, path, ir=None, ast=None, source=None, incomplete=False, parser_classes=None):
         self.path = Path(path) if path is not None else path
         if ir is not None and not isinstance(ir, Section):
             ir = Section(body=ir)
@@ -64,6 +67,7 @@ def __init__(self, path, ir=None, ast=None, source=None, incomplete=False):
         self._ast = ast
         self._source = source
         self._incomplete = incomplete
+        self._parser_classes = parser_classes
 
     @classmethod
     def from_file(cls, filename, definitions=None, preprocess=False,
@@ -275,10 +279,12 @@ def from_regex(cls, raw_source, filepath, parser_classes=None):
         """
         source, _ = sanitize_input(source=raw_source, frontend=REGEX)
 
+        if parser_classes is None:
+            parser_classes = RegexParserClass.AllClasses
         ir = parse_regex_source(source, parser_classes=parser_classes)
         lines = (1, raw_source.count('\n') + 1)
         source = Source(lines, string=raw_source, file=filepath)
-        return cls(path=filepath, ir=ir, source=source, incomplete=True)
+        return cls(path=filepath, ir=ir, source=source, incomplete=True, parser_classes=parser_classes)
 
     @classmethod
     def from_source(cls, source, xmods=None, definitions=None, parser_classes=None, frontend=FP):
@@ -396,6 +402,11 @@ def make_complete(self, **frontend_args):
 
             self.ir._update(body=as_tuple(body))
             self._incomplete = frontend == REGEX
+            if frontend == REGEX:
+                parser_classes = frontend_args.get('parser_classes', RegexParserClass.AllClasses)
+                if self._parser_classes:
+                    parser_classes = self._parser_classes | parser_classes
+                self._parser_classes = parser_classes
 
     @property
     def source(self):
diff --git a/loki/subroutine.py b/loki/subroutine.py
index a739052d5..d9b9ea8ff 100644
--- a/loki/subroutine.py
+++ b/loki/subroutine.py
@@ -69,13 +69,15 @@ class Subroutine(ProgramUnit):
         Mark the object as incomplete, i.e. only partially parsed. This is
         typically the case when it was instantiated using the :any:`Frontend.REGEX`
         frontend and a full parse using one of the other frontends is pending.
+    parser_classes : :any:`RegexParserClass`, optional
+        Provide the list of parser classes used during incomplete regex parsing
     """
 
     def __init__(
             self, name, args=None, docstring=None, spec=None, body=None,
             contains=None, prefix=None, bind=None, result_name=None,
             is_function=False, ast=None, source=None, parent=None,
-            symbol_attrs=None, rescope_symbols=False, incomplete=False
+            symbol_attrs=None, rescope_symbols=False, incomplete=False, parser_classes=None
     ):
         super().__init__(parent=parent)
 
@@ -86,13 +88,13 @@ def __init__(
             name=name, args=args, docstring=docstring, spec=spec, body=body,
             contains=contains,  prefix=prefix, bind=bind, result_name=result_name,
             is_function=is_function, ast=ast, source=source,
-            rescope_symbols=rescope_symbols, incomplete=incomplete
+            rescope_symbols=rescope_symbols, incomplete=incomplete, parser_classes=parser_classes
         )
 
     def __initialize__(
             self, name, docstring=None, spec=None, contains=None,
-            ast=None, source=None, rescope_symbols=False, incomplete=False,
-            body=None, args=None, prefix=None, bind=None, result_name=None, is_function=False,
+            ast=None, source=None, rescope_symbols=False, incomplete=False, parser_classes=None,
+            body=None, args=None, prefix=None, bind=None, result_name=None, is_function=False
     ):
         # First, store additional Subroutine-specific properties
         self._dummies = as_tuple(a.lower() for a in as_tuple(args))  # Order of dummy arguments
@@ -108,7 +110,8 @@ def __initialize__(
 
         super().__initialize__(
             name=name, docstring=docstring, spec=spec, contains=contains,
-            ast=ast, source=source, rescope_symbols=rescope_symbols, incomplete=incomplete
+            ast=ast, source=source, rescope_symbols=rescope_symbols,
+            incomplete=incomplete, parser_classes=parser_classes
         )
 
     def __getstate__(self):
diff --git a/tests/test_frontends.py b/tests/test_frontends.py
index 186ca16ea..d566030b8 100644
--- a/tests/test_frontends.py
+++ b/tests/test_frontends.py
@@ -628,9 +628,22 @@ def test_regex_sourcefile_from_file_parser_classes(here):
     assert not sourcefile.modules
     assert FindNodes(RawSource).visit(sourcefile.ir)
     assert sourcefile._incomplete
+    assert sourcefile._parser_classes == RegexParserClass.TypeDefClass
 
     # Incremental addition of program unit objects
     sourcefile.make_complete(frontend=REGEX, parser_classes=RegexParserClass.ProgramUnitClass)
+    assert sourcefile._incomplete
+    assert sourcefile._parser_classes == RegexParserClass.ProgramUnitClass | RegexParserClass.TypeDefClass
+    # Note that the program unit objects don't include the TypeDefClass because it's lower in the hierarchy
+    # and was not matched previously
+    assert all(
+        module._parser_classes == RegexParserClass.ProgramUnitClass
+        for module in sourcefile.modules
+    )
+    assert all(
+        routine._parser_classes == RegexParserClass.ProgramUnitClass
+        for routine in sourcefile.routines
+    )
 
     assert {module.name.lower() for module in sourcefile.modules} == module_names
     assert {routine.name.lower() for routine in sourcefile.routines} == routine_names
@@ -652,6 +665,21 @@ def test_regex_sourcefile_from_file_parser_classes(here):
         frontend=REGEX,
         parser_classes=RegexParserClass.ProgramUnitClass | RegexParserClass.ImportClass
     )
+    assert sourcefile._parser_classes == (
+        RegexParserClass.ProgramUnitClass | RegexParserClass.TypeDefClass | RegexParserClass.ImportClass
+    )
+    # Note that the program unit objects don't include the TypeDefClass because it's lower in the hierarchy
+    # and was not matched previously
+    assert all(
+        module._parser_classes == (
+            RegexParserClass.ProgramUnitClass | RegexParserClass.ImportClass
+        ) for module in sourcefile.modules
+    )
+    assert all(
+        routine._parser_classes == (
+            RegexParserClass.ProgramUnitClass | RegexParserClass.ImportClass
+        ) for routine in sourcefile.routines
+    )
 
     assert {module.name.lower() for module in sourcefile.modules} == module_names
     assert {routine.name.lower() for routine in sourcefile.routines} == routine_names
@@ -676,6 +704,15 @@ def test_regex_sourcefile_from_file_parser_classes(here):
 
     # Parse the rest
     sourcefile.make_complete(frontend=REGEX, parser_classes=RegexParserClass.AllClasses)
+    assert sourcefile._parser_classes == RegexParserClass.AllClasses
+    assert all(
+        module._parser_classes == RegexParserClass.AllClasses
+        for module in sourcefile.modules
+    )
+    assert all(
+        routine._parser_classes == RegexParserClass.AllClasses
+        for routine in sourcefile.routines
+    )
 
     assert {module.name.lower() for module in sourcefile.modules} == module_names
     assert {routine.name.lower() for routine in sourcefile.routines} == routine_names

From 4405f13718c18e9e5c42e476e0766a76d1907209 Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Fri, 12 May 2023 10:17:30 +0100
Subject: [PATCH 03/23] WIP: Towards SGraph

---
 loki/bulk/item.py   | 59 ++++++++++++++++++++++++++++++++++++++++--
 tests/test_batch.py | 62 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 119 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_batch.py

diff --git a/loki/bulk/item.py b/loki/bulk/item.py
index 53f7761a0..bf0179394 100644
--- a/loki/bulk/item.py
+++ b/loki/bulk/item.py
@@ -20,10 +20,15 @@ def cached_property(func):
 from loki.tools.util import CaseInsensitiveDict
 from loki.visitors import FindNodes
 from loki.ir import CallStatement, TypeDef, ProcedureDeclaration
+from loki.module import Module
+from loki.sourcefile import Sourcefile
 from loki.subroutine import Subroutine
 
 
-__all__ = ['Item', 'SubroutineItem', 'ProcedureBindingItem', 'GlobalVarImportItem', 'GenericImportItem']
+__all__ = [
+    'Item', 'FileItem', 'ModuleItem', 'SubroutineItem',
+    'ProcedureBindingItem', 'GlobalVarImportItem', 'GenericImportItem'
+]
 
 
 class Item:
@@ -89,8 +94,24 @@ class Item:
         Dict of item-specific config markers
     """
 
+    @classmethod
+    def create_from_node(cls, node, source):
+        if isinstance(node, Sourcefile):
+            return FileItem(node.path.name.lower(), source)
+        if isinstance(node, Module):
+            return ModuleItem(node.name.lower(), source)
+
+        if node.parent:
+            scope = node.parent.name
+        else:
+            scope = ''
+        if isinstance(node, Subroutine):
+            return SubroutineItem(f'{scope}#{node.name}'.lower(), source)
+        if isinstance(node, TypeDef):
+            return TypeDefItem(f'{scope}#{node.name}'.lower(), source)
+
     def __init__(self, name, source, config=None):
-        assert '#' in name
+        # assert '#' in name or '.' in name
         self.name = name
         self.source = source
         self.config = config or {}
@@ -115,6 +136,21 @@ def __eq__(self, other):
     def __hash__(self):
         return hash(self.name)
 
+    @property
+    def definitions(self):
+        return ()
+
+    def get_items(self, only=None):
+        items = tuple(
+            self.create_from_node(node, self.source)
+            for node in self.definitions
+        )
+        if only:
+            items = tuple(
+                item for item in items if isinstance(item, only)
+            )
+        return items
+
     def clear_cached_property(self, property_name):
         """
         Clear the cached value for a cached property
@@ -477,6 +513,25 @@ def targets(self):
         return as_tuple(targets)
 
 
+class FileItem(Item):
+
+    @property
+    def definitions(self):
+        return self.source.definitions
+
+
+class ModuleItem(Item):
+
+    @property
+    def definitions(self):
+        module = self.source[self.name]
+        return module.subroutines + tuple(module.typedefs.values())
+
+
+class TypeDefItem(Item):
+    pass
+
+
 class SubroutineItem(Item):
     """
     Implementation of :class:`Item` to represent a Fortran subroutine work item
diff --git a/tests/test_batch.py b/tests/test_batch.py
new file mode 100644
index 000000000..089a57b89
--- /dev/null
+++ b/tests/test_batch.py
@@ -0,0 +1,62 @@
+# (C) Copyright 2018- ECMWF.
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+
+from pathlib import Path
+import pytest
+
+from loki import (
+    HAVE_FP, HAVE_OFP, REGEX, RegexParserClass,
+    FileItem,
+    Sourcefile
+)
+
+pytestmark = pytest.mark.skipif(not HAVE_FP and not HAVE_OFP, reason='Fparser and OFP not available')
+
+
+@pytest.fixture(scope='module', name='here')
+def fixture_here():
+    return Path(__file__).parent
+
+
+def test_file_item(here):
+    proj = here/'sources/projBatch'
+
+    def get_item(path, parser_classes):
+        filepath = proj/path
+        return FileItem(
+            filepath.name.lower(),
+            Sourcefile.from_file(filepath, frontend=REGEX, parser_classes=parser_classes)
+        )
+
+    # A file with simple module that contains a single subroutine
+    item = get_item('module/a_mod.F90', RegexParserClass.ProgramUnitClass)
+    assert item.name == 'a_mod.f90'
+    assert item.definitions == (item.source['a_mod'],)
+    items = item.get_items()
+    assert len(items) == 1
+    assert items[0].name == 'a_mod'
+    assert items[0].definitions == (item.source['a'],)
+
+    # A file with a simple module that contains a single typedef
+    item = get_item('module/t_mod.F90', RegexParserClass.ProgramUnitClass)
+    assert item.name == 't_mod.f90'
+    assert item.definitions == (item.source['t_mod'],)
+
+    items = item.get_items()
+    assert len(items) == 1
+    assert items[0].name == 't_mod'
+    assert items[0].definitions == ()  # No typedefs because not selected in parser classes
+
+    item = get_item('module/t_mod.F90', RegexParserClass.ProgramUnitClass | RegexParserClass.TypeDefClass)
+    assert item.name == 't_mod.f90'
+    assert item.definitions == (item.source['t_mod'],)
+
+    items = item.get_items()
+    assert len(items) == 1
+    assert items[0].name == 't_mod'
+    assert items[0].definitions == (item.source['t'],)

From 62b0e5ce4a90a97332654254893e720b2a205287 Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Wed, 2 Aug 2023 15:12:00 +0100
Subject: [PATCH 04/23] Expose defined IR nodes in Module and Subroutine

---
 loki/program_unit.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/loki/program_unit.py b/loki/program_unit.py
index dada17134..801c4b265 100644
--- a/loki/program_unit.py
+++ b/loki/program_unit.py
@@ -471,6 +471,15 @@ def enum_symbols(self):
         """
         return as_tuple(flatten(enum.symbols for enum in FindNodes(ir.Enumeration).visit(self.spec or ())))
 
+    @property
+    def definitions(self):
+        """
+        The list of IR nodes defined by this program unit.
+
+        Returns an empty tuple by default and can be overwritten by derived nodes.
+        """
+        return ()
+
     @property
     def symbols(self):
         """

From fd94284d1b21a4cfd271cbd079191ced4eb9a99d Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Tue, 16 May 2023 11:44:31 +0100
Subject: [PATCH 05/23] Incremental parsing stubs via Item.definitions

---
 loki/bulk/item.py    | 60 +++++++++++++++++++++++++++++++++++++-------
 loki/program_unit.py |  2 ++
 tests/test_batch.py  | 32 +++++++++++++++++++++--
 3 files changed, 83 insertions(+), 11 deletions(-)

diff --git a/loki/bulk/item.py b/loki/bulk/item.py
index bf0179394..885a151ed 100644
--- a/loki/bulk/item.py
+++ b/loki/bulk/item.py
@@ -15,6 +15,7 @@
         def cached_property(func):
             return func
 
+from loki.frontend import REGEX, RegexParserClass
 from loki.logging import warning
 from loki.tools import as_tuple
 from loki.tools.util import CaseInsensitiveDict
@@ -94,8 +95,10 @@ class Item:
         Dict of item-specific config markers
     """
 
+    _parser_classes = None
+
     @classmethod
-    def create_from_node(cls, node, source):
+    def create_from_ir(cls, node, source):
         if isinstance(node, Sourcefile):
             return FileItem(node.path.name.lower(), source)
         if isinstance(node, Module):
@@ -138,11 +141,19 @@ def __hash__(self):
 
     @property
     def definitions(self):
-        return ()
+        self.concretize()
+        return self.ir.definitions
+
+    @property
+    def ir(self):
+        return self.source[self.local_name]
+
+    def concretize(self):
+        self.ir.make_complete(frontend=REGEX, parser_classes=self._parser_classes)
 
     def get_items(self, only=None):
         items = tuple(
-            self.create_from_node(node, self.source)
+            self.create_from_ir(node, self.source)
             for node in self.definitions
         )
         if only:
@@ -515,21 +526,42 @@ def targets(self):
 
 class FileItem(Item):
 
+    _parser_classes = (
+        RegexParserClass.ProgramUnitClass
+    )
+
     @property
-    def definitions(self):
-        return self.source.definitions
+    def ir(self):
+        return self.source
+
+    @property
+    def local_name(self):
+        return self.name
 
 
 class ModuleItem(Item):
 
+    _parser_classes = (
+        RegexParserClass.ProgramUnitClass | RegexParserClass.InterfaceClass |
+        RegexParserClass.ImportClass | RegexParserClass.TypeDefClass |
+        RegexParserClass.DeclarationClass
+    )
+
     @property
-    def definitions(self):
-        module = self.source[self.name]
-        return module.subroutines + tuple(module.typedefs.values())
+    def local_name(self):
+        return self.name
 
 
 class TypeDefItem(Item):
-    pass
+
+    _parser_classes = ()
+
+    def concretize(self):
+        pass
+
+    @property
+    def definitions(self):
+        return ()
 
 
 class SubroutineItem(Item):
@@ -537,10 +569,20 @@ class SubroutineItem(Item):
     Implementation of :class:`Item` to represent a Fortran subroutine work item
     """
 
+    _parser_classes = (
+        RegexParserClass.ProgramUnitClass | RegexParserClass.InterfaceClass |
+        RegexParserClass.ImportClass | RegexParserClass.TypeDefClass |
+        RegexParserClass.DeclarationClass | RegexParserClass.CallClass
+    )
+
     def __init__(self, name, source, config=None):
         assert '%' not in name
         super().__init__(name, source, config)
 
+    @property
+    def definitions(self):
+        return ()
+
     @cached_property
     def routine(self):
         """
diff --git a/loki/program_unit.py b/loki/program_unit.py
index 801c4b265..9f7ffe331 100644
--- a/loki/program_unit.py
+++ b/loki/program_unit.py
@@ -244,6 +244,8 @@ def make_complete(self, **frontend_args):
         xmods = frontend_args.get('xmods')
         parser_classes = frontend_args.get('parser_classes', RegexParserClass.AllClasses)
         if frontend == Frontend.REGEX and self._parser_classes:
+            if self._parser_classes == parser_classes:
+                return
             parser_classes = parser_classes | self._parser_classes
 
         # If this object does not have a parent, we create a temporary parent scope
diff --git a/tests/test_batch.py b/tests/test_batch.py
index 089a57b89..67c634fef 100644
--- a/tests/test_batch.py
+++ b/tests/test_batch.py
@@ -11,7 +11,7 @@
 
 from loki import (
     HAVE_FP, HAVE_OFP, REGEX, RegexParserClass,
-    FileItem,
+    FileItem, ModuleItem, SubroutineItem,
     Sourcefile
 )
 
@@ -37,6 +37,7 @@ def get_item(path, parser_classes):
     item = get_item('module/a_mod.F90', RegexParserClass.ProgramUnitClass)
     assert item.name == 'a_mod.f90'
     assert item.definitions == (item.source['a_mod'],)
+    assert item.ir is item.source
     items = item.get_items()
     assert len(items) == 1
     assert items[0].name == 'a_mod'
@@ -50,8 +51,13 @@ def get_item(path, parser_classes):
     items = item.get_items()
     assert len(items) == 1
     assert items[0].name == 't_mod'
-    assert items[0].definitions == ()  # No typedefs because not selected in parser classes
+    assert items[0].ir is item.source['t_mod']
+    # No typedefs because not selected in parser classes
+    assert not items[0].ir.typedefs
+    # Calling definitions automatically further completes the source
+    assert items[0].definitions == (items[0].ir.typedefs['t'],)
 
+    # The same file but with typedefs parsed from the get-go
     item = get_item('module/t_mod.F90', RegexParserClass.ProgramUnitClass | RegexParserClass.TypeDefClass)
     assert item.name == 't_mod.f90'
     assert item.definitions == (item.source['t_mod'],)
@@ -59,4 +65,26 @@ def get_item(path, parser_classes):
     items = item.get_items()
     assert len(items) == 1
     assert items[0].name == 't_mod'
+    assert len(items[0].ir.typedefs) == 1
     assert items[0].definitions == (item.source['t'],)
+
+    # Filter items when calling get_items()
+    assert not item.get_items(only=SubroutineItem)
+    items = item.get_items(only=ModuleItem)
+    assert len(items) == 1
+    assert items[0].ir == item.source['t_mod']
+
+
+def test_module_item(here):
+    proj = here/'sources/projBatch'
+
+    def get_item(path, name, parser_classes):
+        filepath = proj/path
+        source = Sourcefile.from_file(filepath, frontend=REGEX, parser_classes=parser_classes)
+        return ModuleItem(name, source=source)
+
+    # A file with simple module that contains a single subroutine
+    item = get_item('module/a_mod.F90', 'a_mod', RegexParserClass.ProgramUnitClass)
+    assert item.name == 'a_mod'
+    assert item.ir is item.source['a_mod']
+    assert item.definitions == (item.source['a'],)

From 315fd7babc7038f2eefe9d6664e2b598d1171ef5 Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Tue, 16 May 2023 17:36:59 +0100
Subject: [PATCH 06/23] Initial sources for batch processing tests

---
 tests/sources/projBatch/headers/header_mod.F90 |  4 ++++
 tests/sources/projBatch/include/comp2.intfb.h  |  9 +++++++++
 tests/sources/projBatch/module/a_mod.F90       |  8 ++++++++
 tests/sources/projBatch/module/b_mod.F90       |  8 ++++++++
 tests/sources/projBatch/module/t_mod.F90       |  8 ++++++++
 tests/sources/projBatch/module/tt_mod.F90      |  8 ++++++++
 tests/sources/projBatch/source/comp1.F90       |  9 +++++++++
 tests/sources/projBatch/source/comp2.f90       | 12 ++++++++++++
 8 files changed, 66 insertions(+)
 create mode 100644 tests/sources/projBatch/headers/header_mod.F90
 create mode 100644 tests/sources/projBatch/include/comp2.intfb.h
 create mode 100644 tests/sources/projBatch/module/a_mod.F90
 create mode 100644 tests/sources/projBatch/module/b_mod.F90
 create mode 100644 tests/sources/projBatch/module/t_mod.F90
 create mode 100644 tests/sources/projBatch/module/tt_mod.F90
 create mode 100644 tests/sources/projBatch/source/comp1.F90
 create mode 100644 tests/sources/projBatch/source/comp2.f90

diff --git a/tests/sources/projBatch/headers/header_mod.F90 b/tests/sources/projBatch/headers/header_mod.F90
new file mode 100644
index 000000000..67171383b
--- /dev/null
+++ b/tests/sources/projBatch/headers/header_mod.F90
@@ -0,0 +1,4 @@
+module header_mod
+    implicit none
+    integer, parameter :: k = 8
+end header_mod
diff --git a/tests/sources/projBatch/include/comp2.intfb.h b/tests/sources/projBatch/include/comp2.intfb.h
new file mode 100644
index 000000000..17ad39d42
--- /dev/null
+++ b/tests/sources/projBatch/include/comp2.intfb.h
@@ -0,0 +1,9 @@
+interface
+subroutine comp2 (arg, val)
+    use t_mod, only: t
+    use header_mod, only: k
+    implicit none
+    type(t), intent(inout) :: arg
+    real(kind=k), intent(inout) :: val(:)
+end subroutine comp2
+end interface
diff --git a/tests/sources/projBatch/module/a_mod.F90 b/tests/sources/projBatch/module/a_mod.F90
new file mode 100644
index 000000000..991d28c1f
--- /dev/null
+++ b/tests/sources/projBatch/module/a_mod.F90
@@ -0,0 +1,8 @@
+module a_mod
+    implicit none
+contains
+    subroutine a(arg)
+        use header_mod, only: k
+        real(kind=k), intent(inout) :: arg(:)
+    end subroutine a
+end module a_mod
diff --git a/tests/sources/projBatch/module/b_mod.F90 b/tests/sources/projBatch/module/b_mod.F90
new file mode 100644
index 000000000..e30d91f09
--- /dev/null
+++ b/tests/sources/projBatch/module/b_mod.F90
@@ -0,0 +1,8 @@
+module b_mod
+    implicit none
+contains
+    subroutine b(arg)
+        use header_mod, only: k
+        real(kind=k), intent(inout) :: arg(:)
+    end subroutine b
+end module b_mod
diff --git a/tests/sources/projBatch/module/t_mod.F90 b/tests/sources/projBatch/module/t_mod.F90
new file mode 100644
index 000000000..00610fd5b
--- /dev/null
+++ b/tests/sources/projBatch/module/t_mod.F90
@@ -0,0 +1,8 @@
+module t_mod
+    use tt_mod, only: tt
+    implicit none
+
+    type t
+        type(tt) :: yay
+    end type t
+end module t_mod
diff --git a/tests/sources/projBatch/module/tt_mod.F90 b/tests/sources/projBatch/module/tt_mod.F90
new file mode 100644
index 000000000..80c1eec1b
--- /dev/null
+++ b/tests/sources/projBatch/module/tt_mod.F90
@@ -0,0 +1,8 @@
+module tt_mod
+    use header_mod, only: k
+    implicit none
+
+    type tt
+        real(kind=k), allocatable :: indirection(:)
+    end type tt
+end module tt_mod
diff --git a/tests/sources/projBatch/source/comp1.F90 b/tests/sources/projBatch/source/comp1.F90
new file mode 100644
index 000000000..4aead708e
--- /dev/null
+++ b/tests/sources/projBatch/source/comp1.F90
@@ -0,0 +1,9 @@
+subroutine comp1 (arg, val)
+    use t_mod, only: t
+    use header_mod, only: k
+    implicit none
+    type(t), intent(inout) :: arg
+    real(kind=k), intent(inout) :: val(:)
+#include "comp2.intfb.h"
+    call comp2(arg, val)
+end subroutine comp1
diff --git a/tests/sources/projBatch/source/comp2.f90 b/tests/sources/projBatch/source/comp2.f90
new file mode 100644
index 000000000..fac6375ec
--- /dev/null
+++ b/tests/sources/projBatch/source/comp2.f90
@@ -0,0 +1,12 @@
+subroutine comp2 (arg, val)
+    use t_mod, only: t
+    use header_mod, only: k
+    use a_mod, only: a
+    use b_mod, only: b
+    implicit none
+    type(t), intent(inout) :: arg
+    real(kind=k), intent(inout) :: val(:)
+
+    call a(t%yay%indirection)
+    call b(val)
+end subroutine comp2

From 107d39f4af648a7018d71d5b98146fccfa8532be Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Tue, 16 May 2023 18:24:21 +0100
Subject: [PATCH 07/23] Link items to parser classes and name
 dependencies/definitions

---
 loki/bulk/item.py   | 59 ++++++++++++++++++++++++++++-----------------
 tests/test_batch.py | 32 +++++++++++++++++++++++-
 2 files changed, 68 insertions(+), 23 deletions(-)

diff --git a/loki/bulk/item.py b/loki/bulk/item.py
index 885a151ed..5ccf210b3 100644
--- a/loki/bulk/item.py
+++ b/loki/bulk/item.py
@@ -14,6 +14,8 @@
     except ImportError:
         def cached_property(func):
             return func
+from functools import reduce
+import sys
 
 from loki.frontend import REGEX, RegexParserClass
 from loki.logging import warning
@@ -27,7 +29,7 @@ def cached_property(func):
 
 
 __all__ = [
-    'Item', 'FileItem', 'ModuleItem', 'SubroutineItem',
+    'Item', 'FileItem', 'ModuleItem', 'SubroutineItem', 'TypeDefItem',
     'ProcedureBindingItem', 'GlobalVarImportItem', 'GenericImportItem'
 ]
 
@@ -95,7 +97,9 @@ class Item:
         Dict of item-specific config markers
     """
 
-    _parser_classes = None
+    _parser_class = None
+    _defines_items = ()
+    _depends_items = ()
 
     @classmethod
     def create_from_ir(cls, node, source):
@@ -141,15 +145,38 @@ def __hash__(self):
 
     @property
     def definitions(self):
-        self.concretize()
+        self.concretize_definitions()
         return self.ir.definitions
 
+    @property
+    def dependencies(self):
+        self.concretize_dependencies()
+        return self._dependencies
+
+    @property
+    def _dependencies(self):
+        return ()
+
     @property
     def ir(self):
         return self.source[self.local_name]
 
-    def concretize(self):
-        self.ir.make_complete(frontend=REGEX, parser_classes=self._parser_classes)
+    def _parser_classes_from_item_type_names(self, item_type_names):
+        item_types = [getattr(sys.modules[__name__], name) for name in item_type_names]
+        parser_classes = [p for item_type in item_types if (p := item_type._parser_class) is not None]
+        if parser_classes:
+            return reduce(lambda x, y: x | y, parser_classes)
+        return None
+
+    def concretize_definitions(self):
+        parser_classes = self._parser_classes_from_item_type_names(self._defines_items)
+        if parser_classes:
+            self.ir.make_complete(frontend=REGEX, parser_classes=parser_classes)
+
+    def concretize_dependencies(self):
+        parser_classes = self._parser_classes_from_item_type_names(self._depends_items)
+        if parser_classes:
+            self.ir.make_complete(frontend=REGEX, parser_classes=parser_classes)
 
     def get_items(self, only=None):
         items = tuple(
@@ -526,9 +553,7 @@ def targets(self):
 
 class FileItem(Item):
 
-    _parser_classes = (
-        RegexParserClass.ProgramUnitClass
-    )
+    _parser_class = None
 
     @property
     def ir(self):
@@ -541,11 +566,8 @@ def local_name(self):
 
 class ModuleItem(Item):
 
-    _parser_classes = (
-        RegexParserClass.ProgramUnitClass | RegexParserClass.InterfaceClass |
-        RegexParserClass.ImportClass | RegexParserClass.TypeDefClass |
-        RegexParserClass.DeclarationClass
-    )
+    _parser_class = RegexParserClass.ProgramUnitClass
+    _defines_items = ('SubroutineItem', 'TypeDefItem')
 
     @property
     def local_name(self):
@@ -554,10 +576,7 @@ def local_name(self):
 
 class TypeDefItem(Item):
 
-    _parser_classes = ()
-
-    def concretize(self):
-        pass
+    _parser_class = RegexParserClass.TypeDefClass
 
     @property
     def definitions(self):
@@ -569,11 +588,7 @@ class SubroutineItem(Item):
     Implementation of :class:`Item` to represent a Fortran subroutine work item
     """
 
-    _parser_classes = (
-        RegexParserClass.ProgramUnitClass | RegexParserClass.InterfaceClass |
-        RegexParserClass.ImportClass | RegexParserClass.TypeDefClass |
-        RegexParserClass.DeclarationClass | RegexParserClass.CallClass
-    )
+    _parser_class = RegexParserClass.ProgramUnitClass
 
     def __init__(self, name, source, config=None):
         assert '%' not in name
diff --git a/tests/test_batch.py b/tests/test_batch.py
index 67c634fef..636d34b5c 100644
--- a/tests/test_batch.py
+++ b/tests/test_batch.py
@@ -11,7 +11,7 @@
 
 from loki import (
     HAVE_FP, HAVE_OFP, REGEX, RegexParserClass,
-    FileItem, ModuleItem, SubroutineItem,
+    FileItem, ModuleItem, SubroutineItem, TypeDefItem,
     Sourcefile
 )
 
@@ -88,3 +88,33 @@ def get_item(path, name, parser_classes):
     assert item.name == 'a_mod'
     assert item.ir is item.source['a_mod']
     assert item.definitions == (item.source['a'],)
+
+
+def test_subroutine_item(here):
+    proj = here/'sources/projBatch'
+
+    def get_item(path, name, parser_classes):
+        filepath = proj/path
+        source = Sourcefile.from_file(filepath, frontend=REGEX, parser_classes=parser_classes)
+        return SubroutineItem(name, source=source)
+
+    # A file with a single subroutine definition
+    item = get_item('source/comp1.F90', '#comp1', RegexParserClass.ProgramUnitClass)
+    assert item.name == '#comp1'
+    assert item.ir is item.source['comp1']
+    assert item.definitions is ()
+
+
+def test_typedef_item(here):
+    proj = here/'sources/projBatch'
+
+    def get_item(path, name, parser_classes):
+        filepath = proj/path
+        source = Sourcefile.from_file(filepath, frontend=REGEX, parser_classes=parser_classes)
+        return TypeDefItem(name, source=source)
+
+    # A file with a single type definition
+    item = get_item('module/t_mod.F90', 't_mod#t', RegexParserClass.ProgramUnitClass | RegexParserClass.TypeDefClass)
+    assert item.name == 't_mod#t'
+    assert item.ir is item.source['t']
+    assert item.definitions is ()

From d2bad7538f59f1bee4ac60aeb1258a1fe25778da Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Wed, 2 Aug 2023 15:12:41 +0100
Subject: [PATCH 08/23] Expose global variables declared in a module

---
 loki/module.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/loki/module.py b/loki/module.py
index 96d597bc8..4bde828ff 100644
--- a/loki/module.py
+++ b/loki/module.py
@@ -17,8 +17,9 @@
 from loki.program_unit import ProgramUnit
 from loki.scope import Scope
 from loki.subroutine import Subroutine
-from loki.tools import as_tuple
+from loki.tools import as_tuple, flatten
 from loki.types import ModuleType, SymbolAttributes
+from loki.visitors import FindNodes
 
 
 __all__ = ['Module']
@@ -293,6 +294,12 @@ def __setstate__(self, s):
         # Ensure that we are attaching all symbols to the newly create ``self``.
         self.rescope_symbols()
 
+    @property
+    def variables(self):
+        return tuple(flatten(
+            decl.symbols for decl in FindNodes(VariableDeclaration).visit(self.spec or ())
+        ))
+
     @property
     def definitions(self):
         """

From d0b2145b8763c9732dca910f06d532790d599991 Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Thu, 18 May 2023 14:42:15 +0100
Subject: [PATCH 09/23] Introduce an empty RegexParserClass

---
 loki/frontend/regex.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/loki/frontend/regex.py b/loki/frontend/regex.py
index c7220cfd6..e0dc07564 100644
--- a/loki/frontend/regex.py
+++ b/loki/frontend/regex.py
@@ -42,6 +42,7 @@ class RegexParserClass(Flag):
     pattern matching can be switched on and off for some pattern classes, and thus the overall
     parse time reduced.
     """
+    EmptyClass = 0
     ProgramUnitClass = auto()
     InterfaceClass = auto()
     ImportClass = auto()

From 65facf8c68b874e29287d134b1b18738db8656a0 Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Thu, 18 May 2023 14:58:07 +0100
Subject: [PATCH 10/23] First draft of on-demand regex logic and graph build

---
 loki/bulk/item.py                             | 280 ++++++++++++++----
 .../sources/projBatch/headers/header_mod.F90  |   2 +-
 tests/sources/projBatch/module/b_mod.F90      |   2 +-
 tests/sources/projBatch/module/t_mod.F90      |  16 +
 tests/sources/projBatch/module/tt_mod.F90     |   6 +
 tests/sources/projBatch/source/comp1.F90      |   4 +-
 tests/sources/projBatch/source/comp2.f90      |   1 +
 tests/test_batch.py                           | 233 ++++++++++++++-
 8 files changed, 466 insertions(+), 78 deletions(-)

diff --git a/loki/bulk/item.py b/loki/bulk/item.py
index 5ccf210b3..de895f462 100644
--- a/loki/bulk/item.py
+++ b/loki/bulk/item.py
@@ -6,31 +6,23 @@
 # nor does it submit to any jurisdiction.
 
 from abc import abstractmethod
-try:
-    from functools import cached_property
-except ImportError:
-    try:
-        from cached_property import cached_property
-    except ImportError:
-        def cached_property(func):
-            return func
-from functools import reduce
+from functools import cached_property, reduce
 import sys
 
 from loki.frontend import REGEX, RegexParserClass
+from loki.expression import TypedSymbol, MetaSymbol, ProcedureSymbol
+from loki.ir import Import, CallStatement, TypeDef, ProcedureDeclaration
 from loki.logging import warning
-from loki.tools import as_tuple
-from loki.tools.util import CaseInsensitiveDict
-from loki.visitors import FindNodes
-from loki.ir import CallStatement, TypeDef, ProcedureDeclaration
 from loki.module import Module
-from loki.sourcefile import Sourcefile
 from loki.subroutine import Subroutine
+from loki.tools import as_tuple, flatten, CaseInsensitiveDict
+from loki.visitors import FindNodes
 
 
 __all__ = [
-    'Item', 'FileItem', 'ModuleItem', 'SubroutineItem', 'TypeDefItem',
-    'ProcedureBindingItem', 'GlobalVarImportItem', 'GenericImportItem'
+    'Item', 'FileItem', 'ModuleItem', 'ProcedureItem', 'SubroutineItem', 'TypeDefItem',
+    'InterfaceItem', 'ProcedureBindingItem', 'GlobalVariableItem',
+    'GlobalVarImportItem', 'GenericImportItem'
 ]
 
 
@@ -99,23 +91,7 @@ class Item:
 
     _parser_class = None
     _defines_items = ()
-    _depends_items = ()
-
-    @classmethod
-    def create_from_ir(cls, node, source):
-        if isinstance(node, Sourcefile):
-            return FileItem(node.path.name.lower(), source)
-        if isinstance(node, Module):
-            return ModuleItem(node.name.lower(), source)
-
-        if node.parent:
-            scope = node.parent.name
-        else:
-            scope = ''
-        if isinstance(node, Subroutine):
-            return SubroutineItem(f'{scope}#{node.name}'.lower(), source)
-        if isinstance(node, TypeDef):
-            return TypeDefItem(f'{scope}#{node.name}'.lower(), source)
+    _depends_class = None
 
     def __init__(self, name, source, config=None):
         # assert '#' in name or '.' in name
@@ -125,7 +101,7 @@ def __init__(self, name, source, config=None):
         self.trafo_data = {}
 
     def __repr__(self):
-        return f'loki.bulk.Item<{self.name}>'
+        return f'loki.bulk.{self.__class__.__name__}<{self.name}>'
 
     def __eq__(self, other):
         """
@@ -145,8 +121,7 @@ def __hash__(self):
 
     @property
     def definitions(self):
-        self.concretize_definitions()
-        return self.ir.definitions
+        return ()
 
     @property
     def dependencies(self):
@@ -170,23 +145,141 @@ def _parser_classes_from_item_type_names(self, item_type_names):
 
     def concretize_definitions(self):
         parser_classes = self._parser_classes_from_item_type_names(self._defines_items)
-        if parser_classes:
+        if parser_classes and hasattr(self.ir, 'make_complete'):
             self.ir.make_complete(frontend=REGEX, parser_classes=parser_classes)
 
     def concretize_dependencies(self):
-        parser_classes = self._parser_classes_from_item_type_names(self._depends_items)
-        if parser_classes:
-            self.ir.make_complete(frontend=REGEX, parser_classes=parser_classes)
+        if self._depends_class and hasattr(self.ir, 'make_complete'):
+            ir = self.ir
+            while ir.parent:
+                ir = ir.parent
+            ir.make_complete(frontend=REGEX, parser_classes=self._depends_class)
+
+    def create_from_ir(self, node, item_cache):
+        if isinstance(node, Module):
+            item_name = node.name.lower()
+            items = as_tuple(item_cache.get(item_name))
+            if not items:
+                assert node in self.source.modules
+                items = as_tuple(ModuleItem(item_name, source=self.source))
+
+        elif isinstance(node, Subroutine):
+            item_name = f'{getattr(node.parent, "name", "")}#{node.name}'.lower()
+            items = as_tuple(item_cache.get(item_name))
+            if not items:
+                assert node in self.source.all_subroutines
+                items = as_tuple(ProcedureItem(item_name, source=self.source))
+
+        elif isinstance(node, TypeDef):
+            item_name = f'{node.parent.name}#{node.name}'.lower()
+            items = as_tuple(item_cache.get(item_name))
+            if not items:
+                assert node.parent in self.source.modules
+                items = as_tuple(TypeDefItem(item_name, source=self.source))
+
+        elif isinstance(node, Import):
+            # If we have a fully-qualified import (which we hopefully have),
+            # we create a dependency for every imported symbol, otherwise we
+            # depend only on the imported module
+            module_item = item_cache[node.module.lower()]
+            if node.symbols:
+                module_definitions = {
+                    item.local_name: item for item in module_item.create_definition_items(item_cache=item_cache)
+                }
+                items = tuple(module_definitions[str(smbl).lower()] for smbl in node.symbols)
+            else:
+                items = as_tuple(module_item)
+
+        elif isinstance(node, CallStatement):
+            procedure_name = str(node.name)
+            if '%' in procedure_name:
+                # This is a typebound procedure call, we are only resolving
+                # to the type member by mapping the local name to the type name
+                type_name = node.name.parents[0].type.dtype.name.lower()
+                # Find the module where the type is defined
+                if type_name in node.name.scope.imported_symbols:
+                    for imprt in node.name.scope.imports:
+                        if type_name in imprt.symbols:
+                            module_name = imprt.module.lower()
+                            break
+                else:
+                    # TODO: Resolve call to type-bound procedure
+                    raise NotImplementedError()
+                item_name = f'{module_name}#{type_name}%{"%".join(node.name.name_parts[1:])}'.lower()
+                items = as_tuple(item_cache.get(item_name))
+                if not items:
+                    module_item = item_cache[module_name]
+                    items = as_tuple(ProcedureBindingItem(item_name, source=module_item.source))
+            elif procedure_name in self.ir.imported_symbols:
+                # This is a call to a module procedure which has been imported via
+                # a fully qualified import
+                for imprt in self.ir.imports:
+                    if procedure_name in imprt.symbols:
+                        # TODO: Handle renaming
+                        module_name = imprt.module.lower()
+                        break
+                item_name = f'{module_name}#{procedure_name}'.lower()
+                items = as_tuple(item_cache.get(item_name))
+                if not items:
+                    module_item = item_cache[module_name]
+                    items = as_tuple(ProcedureBindingItem(item_name, source=module_item.source))
+            elif procedure_name in (intf_map := self.ir.interface_symbols):
+                # TODO: Handle declaration via interface
+                raise NotImplementedError()
+            else:
+                item_name = f'#{procedure_name}'.lower()
+                items = (item_cache[item_name],)
+
+        elif isinstance(node, ProcedureSymbol):
+            # This is a procedure binding
+            assert '%' in node.name
+            type_name = node.parent.type.dtype.name
+            proc_name = '%'.join(node.name_parts[1:])
+            module = node.scope.parent
+            if type_name in module.typedefs:
+                module_name = module.name.lower()
+            else:
+                for imprt in module.imports:
+                    if type_name in imprt.symbols:
+                        module_name = imprt.module.lower()
+                        break
+            item_name = f'{module_name}#{type_name}%{proc_name}'.lower()
+            items = as_tuple(item_cache.get(item_name))
+            if not items:
+                module_item = item_cache[module_name]
+                items = as_tuple(ProcedureBindingItem(item_name, source=module_item.source))
+
+        elif isinstance(node, (TypedSymbol, MetaSymbol)):
+            # This is a global variable
+            item_name = f'{node.scope.name}#{node.name}'.lower()
+            items = as_tuple(item_cache.get(item_name))
+            if not items:
+                module_item = item_cache[node.scope.name.lower()]
+                items = as_tuple(GlobalVariableItem(item_name, source=module_item.source))
+        else:
+            raise ValueError(f'{node} has an unsupported node type {type(node)}')
+
+        # Insert new items into the cache
+        item_cache.update((item.name, item) for item in items if item.name not in item_cache)
+
+        return items
+
+    def create_definition_items(self, item_cache, only=None):
+        items = tuple(flatten(self.create_from_ir(node, item_cache) for node in self.definitions))
+        if only:
+            items = tuple(item for item in items if isinstance(item, only))
+        return items
+
+    def create_dependency_items(self, item_cache, only=None):
+        if not (dependencies := self.dependencies):
+            return ()
+
+        items = ()
+        for node in dependencies:
+            items += self.create_from_ir(node, item_cache)
 
-    def get_items(self, only=None):
-        items = tuple(
-            self.create_from_ir(node, self.source)
-            for node in self.definitions
-        )
         if only:
-            items = tuple(
-                item for item in items if isinstance(item, only)
-            )
+            items = tuple(item for item in items if isinstance(item, only))
         return items
 
     def clear_cached_property(self, property_name):
@@ -554,6 +647,12 @@ def targets(self):
 class FileItem(Item):
 
     _parser_class = None
+    _defines_items = ('ModuleItem', 'SubroutineItem')
+
+    @property
+    def definitions(self):
+        self.concretize_definitions()
+        return self.ir.definitions
 
     @property
     def ir(self):
@@ -566,21 +665,58 @@ def local_name(self):
 
 class ModuleItem(Item):
 
-    _parser_class = RegexParserClass.ProgramUnitClass
-    _defines_items = ('SubroutineItem', 'TypeDefItem')
+    _parser_class = RegexParserClass.ProgramUnitClass #| RegexParserClass.ImportClass
+    _defines_items = ('ProcedureItem', 'TypeDefItem', 'GlobalVariableItem')
+    _depends_class = RegexParserClass.ImportClass
+
+    @property
+    def definitions(self):
+        self.concretize_definitions()
+        return self.ir.definitions
+
+    @property
+    def _dependencies(self):
+        return as_tuple(self.ir.imports)
 
     @property
     def local_name(self):
         return self.name
 
 
+class ProcedureItem(Item):
+
+    _parser_class = RegexParserClass.ProgramUnitClass
+    _depends_class = (
+        RegexParserClass.ImportClass | RegexParserClass.InterfaceClass |
+        RegexParserClass.DeclarationClass | RegexParserClass.CallClass
+    )
+
+    @property
+    def _dependencies(self):
+        calls = tuple(FindNodes(CallStatement).visit(self.ir.ir))
+        imports = self.ir.imports
+        if self.ir.parent:
+            imports += self.ir.parent.imports
+        return as_tuple(imports) + calls
+
+
 class TypeDefItem(Item):
 
-    _parser_class = RegexParserClass.TypeDefClass
+    _parser_class = RegexParserClass.TypeDefClass #| RegexParserClass.DeclarationClass
 
     @property
-    def definitions(self):
-        return ()
+    def _dependencies(self):
+        return as_tuple(self.ir.parent.imports)
+
+
+class InterfaceItem(Item):
+
+    _parser_class = RegexParserClass.InterfaceClass
+
+
+class GlobalVariableItem(Item):
+
+    _parser_class = RegexParserClass.DeclarationClass
 
 
 class SubroutineItem(Item):
@@ -588,16 +724,10 @@ class SubroutineItem(Item):
     Implementation of :class:`Item` to represent a Fortran subroutine work item
     """
 
-    _parser_class = RegexParserClass.ProgramUnitClass
-
     def __init__(self, name, source, config=None):
         assert '%' not in name
         super().__init__(name, source, config)
 
-    @property
-    def definitions(self):
-        return ()
-
     @cached_property
     def routine(self):
         """
@@ -701,6 +831,38 @@ class ProcedureBindingItem(Item):
     procedures to their implementation in a Fortran routine.
     """
 
+    _parser_class = RegexParserClass.CallClass
+    # _depends_items = ('ProcedureItem',)
+
+    @property
+    def ir(self):
+        name_parts = self.local_name.split('%')
+        typedef = self.source[name_parts[0]]
+        for decl in typedef.declarations:
+            if name_parts[1] in decl.symbols:
+                return decl
+        raise RuntimeError(f'Declaration for {self.name} not found')
+
+    @property
+    def symbol(self):
+        local_name = self.local_name.split('%')[1]
+        decl = self.ir
+        return decl.symbols[decl.symbols.index(local_name)]
+
+    @property
+    def _dependencies(self):
+        symbol = self.symbol
+        name_parts = self.local_name.split('%')
+        if len(name_parts) == 2:
+            # TODO: generic bindings
+            if symbol.type.initial:
+                return as_tuple(symbol.type.initial.type.dtype.procedure)
+            return as_tuple(self.source[symbol.name])
+
+        # This is a typebound procedure on a member
+        proc_name = f'{symbol.name}%{"%".join(name_parts[2:])}'
+        return as_tuple(ProcedureSymbol(name=proc_name, parent=symbol, scope=symbol.scope))
+
     def __init__(self, name, source, config=None):
         assert '%' in name
         super().__init__(name, source, config)
diff --git a/tests/sources/projBatch/headers/header_mod.F90 b/tests/sources/projBatch/headers/header_mod.F90
index 67171383b..20c17fddc 100644
--- a/tests/sources/projBatch/headers/header_mod.F90
+++ b/tests/sources/projBatch/headers/header_mod.F90
@@ -1,4 +1,4 @@
 module header_mod
     implicit none
     integer, parameter :: k = 8
-end header_mod
+end module header_mod
diff --git a/tests/sources/projBatch/module/b_mod.F90 b/tests/sources/projBatch/module/b_mod.F90
index e30d91f09..6b57fa9b2 100644
--- a/tests/sources/projBatch/module/b_mod.F90
+++ b/tests/sources/projBatch/module/b_mod.F90
@@ -1,8 +1,8 @@
 module b_mod
+    use header_mod, only: k
     implicit none
 contains
     subroutine b(arg)
-        use header_mod, only: k
         real(kind=k), intent(inout) :: arg(:)
     end subroutine b
 end module b_mod
diff --git a/tests/sources/projBatch/module/t_mod.F90 b/tests/sources/projBatch/module/t_mod.F90
index 00610fd5b..358ab2709 100644
--- a/tests/sources/projBatch/module/t_mod.F90
+++ b/tests/sources/projBatch/module/t_mod.F90
@@ -2,7 +2,23 @@ module t_mod
     use tt_mod, only: tt
     implicit none
 
+    type t1
+    contains
+        procedure :: way => my_way
+    end type t1
+
     type t
         type(tt) :: yay
+        type(t1) :: no
+    contains
+        procedure :: proc => t_proc
     end type t
+contains
+    subroutine t_proc(this)
+        class(t), intent(inout) :: this
+    end subroutine t_proc
+
+    subroutine my_way(this)
+        class(t1), intent(inout) :: this
+    end subroutine my_way
 end module t_mod
diff --git a/tests/sources/projBatch/module/tt_mod.F90 b/tests/sources/projBatch/module/tt_mod.F90
index 80c1eec1b..211ccabdd 100644
--- a/tests/sources/projBatch/module/tt_mod.F90
+++ b/tests/sources/projBatch/module/tt_mod.F90
@@ -4,5 +4,11 @@ module tt_mod
 
     type tt
         real(kind=k), allocatable :: indirection(:)
+    contains
+        procedure :: proc => tt_proc
     end type tt
+contains
+    subroutine tt_proc(this)
+        class(tt), intent(inout) :: this
+    end subroutine tt_proc
 end module tt_mod
diff --git a/tests/sources/projBatch/source/comp1.F90 b/tests/sources/projBatch/source/comp1.F90
index 4aead708e..ae01569c2 100644
--- a/tests/sources/projBatch/source/comp1.F90
+++ b/tests/sources/projBatch/source/comp1.F90
@@ -1,9 +1,11 @@
 subroutine comp1 (arg, val)
     use t_mod, only: t
-    use header_mod, only: k
+    use header_mod
     implicit none
     type(t), intent(inout) :: arg
     real(kind=k), intent(inout) :: val(:)
 #include "comp2.intfb.h"
+    call arg%proc()
     call comp2(arg, val)
+    call arg%no%way()
 end subroutine comp1
diff --git a/tests/sources/projBatch/source/comp2.f90 b/tests/sources/projBatch/source/comp2.f90
index fac6375ec..6b4439682 100644
--- a/tests/sources/projBatch/source/comp2.f90
+++ b/tests/sources/projBatch/source/comp2.f90
@@ -9,4 +9,5 @@ subroutine comp2 (arg, val)
 
     call a(t%yay%indirection)
     call b(val)
+    call arg%yay%proc()
 end subroutine comp2
diff --git a/tests/test_batch.py b/tests/test_batch.py
index 636d34b5c..ea7b5d5a9 100644
--- a/tests/test_batch.py
+++ b/tests/test_batch.py
@@ -6,13 +6,15 @@
 # nor does it submit to any jurisdiction.
 
 
+from collections import deque
 from pathlib import Path
+import networkx as nx
 import pytest
 
 from loki import (
-    HAVE_FP, HAVE_OFP, REGEX, RegexParserClass,
-    FileItem, ModuleItem, SubroutineItem, TypeDefItem,
-    Sourcefile
+    HAVE_FP, HAVE_OFP, REGEX, RegexParserClass, as_tuple,
+    FileItem, ModuleItem, ProcedureItem, TypeDefItem, ProcedureBindingItem, GlobalVariableItem,
+    Sourcefile, Section, RawSource, Import, CallStatement
 )
 
 pytestmark = pytest.mark.skipif(not HAVE_FP and not HAVE_OFP, reason='Fparser and OFP not available')
@@ -34,11 +36,28 @@ def get_item(path, parser_classes):
         )
 
     # A file with simple module that contains a single subroutine
+    item = get_item('module/a_mod.F90', RegexParserClass.EmptyClass)
+    assert item.name == 'a_mod.f90'
+    assert item.ir is item.source
+    # The file is not parsed at all
+    assert not item.source.definitions
+    assert isinstance(item.source.ir, Section)
+    assert len(item.source.ir.body) == 1
+    assert isinstance(item.source.ir.body[0], RawSource)
+
+    # Querying definitions triggers a round of parsing
+    assert item.definitions == (item.source['a_mod'],)
+    assert len(item.source.definitions) == 1
+    items = item.create_definition_items(item_cache={})
+    assert len(items) == 1
+    assert items[0].name == 'a_mod'
+    assert items[0].definitions == (item.source['a'],)
+
     item = get_item('module/a_mod.F90', RegexParserClass.ProgramUnitClass)
     assert item.name == 'a_mod.f90'
     assert item.definitions == (item.source['a_mod'],)
     assert item.ir is item.source
-    items = item.get_items()
+    items = item.create_definition_items(item_cache={})
     assert len(items) == 1
     assert items[0].name == 'a_mod'
     assert items[0].definitions == (item.source['a'],)
@@ -48,30 +67,44 @@ def get_item(path, parser_classes):
     assert item.name == 't_mod.f90'
     assert item.definitions == (item.source['t_mod'],)
 
-    items = item.get_items()
+    items = item.create_definition_items(item_cache={})
     assert len(items) == 1
     assert items[0].name == 't_mod'
     assert items[0].ir is item.source['t_mod']
     # No typedefs because not selected in parser classes
     assert not items[0].ir.typedefs
     # Calling definitions automatically further completes the source
-    assert items[0].definitions == (items[0].ir.typedefs['t'],)
+    assert items[0].definitions == (
+        items[0].ir['t_proc'],
+        items[0].ir['my_way'],
+        items[0].ir.typedefs['t1'],
+        items[0].ir.typedefs['t']
+    )
+
+    # Files don't have dependencies (direct dependencies, anyway)
+    assert item.dependencies is ()
 
     # The same file but with typedefs parsed from the get-go
     item = get_item('module/t_mod.F90', RegexParserClass.ProgramUnitClass | RegexParserClass.TypeDefClass)
     assert item.name == 't_mod.f90'
     assert item.definitions == (item.source['t_mod'],)
 
-    items = item.get_items()
+    items = item.create_definition_items(item_cache={})
     assert len(items) == 1
     assert items[0].name == 't_mod'
-    assert len(items[0].ir.typedefs) == 1
-    assert items[0].definitions == (item.source['t'],)
+    assert len(items[0].ir.typedefs) == 2
+    assert items[0].definitions == (
+        item.source['t_proc'],
+        item.source['my_way'],
+        item.source['t1'],
+        item.source['t']
+    )
 
-    # Filter items when calling get_items()
-    assert not item.get_items(only=SubroutineItem)
-    items = item.get_items(only=ModuleItem)
+    # Filter items when calling create_definition_items()
+    assert not item.create_definition_items(only=ProcedureItem, item_cache={})
+    items = item.create_definition_items(only=ModuleItem, item_cache={})
     assert len(items) == 1
+    assert isinstance(items[0], ModuleItem)
     assert items[0].ir == item.source['t_mod']
 
 
@@ -83,27 +116,97 @@ def get_item(path, name, parser_classes):
         source = Sourcefile.from_file(filepath, frontend=REGEX, parser_classes=parser_classes)
         return ModuleItem(name, source=source)
 
-    # A file with simple module that contains a single subroutine
+    # A file with simple module that contains a single subroutine and has no dependencies on
+    # the module level
     item = get_item('module/a_mod.F90', 'a_mod', RegexParserClass.ProgramUnitClass)
     assert item.name == 'a_mod'
     assert item.ir is item.source['a_mod']
     assert item.definitions == (item.source['a'],)
 
+    items = item.create_definition_items(item_cache={})
+    assert len(items) == 1
+    assert isinstance(items[0], ProcedureItem)
+    assert items[0].ir == item.source['a']
+
+    assert not item.dependencies
+
+    # A different file with a simple module that contains a single subroutine but has an import
+    # dependency on the module level
+    item = get_item('module/b_mod.F90', 'b_mod', RegexParserClass.ProgramUnitClass)
+    assert item.name == 'b_mod'
+    assert item.ir is item.source['b_mod']
+    assert item.definitions == (item.source['b'],)
+
+    items = item.create_definition_items(item_cache={})
+    assert len(items) == 1
+    assert isinstance(items[0], ProcedureItem)
+    assert items[0].ir == item.source['b']
+
+    dependencies = item.dependencies
+    assert len(dependencies) == 1
+    assert isinstance(dependencies[0], Import)
+    assert dependencies[0].module == 'header_mod'
+
+    # Make sure the dependencies are also found correctly if done without parsing definitions first
+    item = get_item('module/b_mod.F90', 'b_mod', RegexParserClass.ProgramUnitClass)
+    dependencies = item.dependencies
+    assert len(dependencies) == 1 and dependencies[0].module == 'header_mod'
 
-def test_subroutine_item(here):
+
+def test_procedure_item(here):
     proj = here/'sources/projBatch'
 
     def get_item(path, name, parser_classes):
         filepath = proj/path
         source = Sourcefile.from_file(filepath, frontend=REGEX, parser_classes=parser_classes)
-        return SubroutineItem(name, source=source)
+        return ProcedureItem(name, source=source)
 
-    # A file with a single subroutine definition
+    # A file with a single subroutine definition that calls a routine via interface block
     item = get_item('source/comp1.F90', '#comp1', RegexParserClass.ProgramUnitClass)
     assert item.name == '#comp1'
     assert item.ir is item.source['comp1']
     assert item.definitions is ()
 
+    assert not item.create_definition_items(item_cache={})
+
+    dependencies = item.dependencies
+    assert len(dependencies) == 5
+    assert isinstance(dependencies[0], Import)
+    assert dependencies[0].module == 't_mod'
+    assert isinstance(dependencies[1], Import)
+    assert dependencies[1].module == 'header_mod'
+    assert isinstance(dependencies[2], CallStatement)
+    assert dependencies[2].name == 'arg%proc'
+    assert isinstance(dependencies[3], CallStatement)
+    assert dependencies[3].name == 'comp2'
+    assert isinstance(dependencies[4], CallStatement)
+    assert dependencies[4].name == 'arg%no%way'
+
+    # A file with a single subroutine definition that calls two routines via module imports
+    item = get_item('source/comp2.F90', '#comp2', RegexParserClass.ProgramUnitClass)
+    assert item.name == '#comp2'
+    assert item.ir is item.source['comp2']
+    assert item.definitions is ()
+
+    assert not item.create_definition_items(item_cache={})
+
+    dependencies = item.dependencies
+    assert len(dependencies) == 7
+    assert isinstance(dependencies[0], Import)
+    assert dependencies[0].module == 't_mod'
+    assert isinstance(dependencies[1], Import)
+    assert dependencies[1].module == 'header_mod'
+    assert isinstance(dependencies[2], Import)
+    assert dependencies[2].module == 'a_mod'
+    assert isinstance(dependencies[3], Import)
+    assert dependencies[3].module == 'b_mod'
+    assert isinstance(dependencies[4], CallStatement)
+    assert dependencies[4].name == 'a'
+    assert isinstance(dependencies[5], CallStatement)
+    assert dependencies[5].name == 'b'
+    assert isinstance(dependencies[6], CallStatement)
+    assert dependencies[6].name == 'arg%yay%proc'
+
 
 def test_typedef_item(here):
     proj = here/'sources/projBatch'
@@ -118,3 +221,101 @@ def get_item(path, name, parser_classes):
     assert item.name == 't_mod#t'
     assert item.ir is item.source['t']
     assert item.definitions is ()
+
+    assert not item.create_definition_items(item_cache={})
+    assert item.dependencies == as_tuple(item.ir.parent.imports)
+
+
+def test_item_graph(here):
+    """
+    Build a :any:`nx.Digraph` from a dummy call hierarchy to check the incremental parsing and
+    discovery behaves as expected.
+
+    Expected dependencies:
+
+    .. code-block::
+             + -------------- + --(imports)-->  t_mod#t  --(imports)-->  tt_mod#tt
+           /                  |
+       comp1  --(calls)-->  comp2  --(calls)-->  a_mod#a
+         |                    |
+         |                    + --(calls)-->  b_mod#b
+         |                    |
+         |                    + --(calls)--> tt_mod#tt%proc --(binds to) --> tt_mod#tt_proc
+         |
+         + --(calls)-->  t_mod#t%proc  --(binds to)--> t_mod#t_proc
+         |
+         + --(calls)-->  t_mod#t%no%way --(binds to)-->  t_mod#t1%way  --(binds to)-->  t_mod#my_way
+
+    Additionally, ``comp`` depends on ``header_mod`` (for a kind-parameter ``k``), while
+    all others except ``t_mod``/``t_mod#t`` depend directly on the kind-parameter ``header_mod#k``.
+
+    """
+    proj = here/'sources/projBatch'
+    suffixes = ['.f90', '.F90']
+
+    path_list = [f for ext in suffixes for f in proj.glob(f'**/*{ext}')]
+    assert len(path_list) == 7
+
+    # Map item names to items
+    item_cache = {}
+
+    # Instantiate the basic list of items (files, modules, subroutines)
+    for path in path_list:
+        relative_path = str(path.relative_to(proj))
+        source = Sourcefile.from_file(path, frontend=REGEX, parser_classes=RegexParserClass.ProgramUnitClass)
+        file_item = FileItem(name=relative_path, source=source)
+        item_cache[relative_path] = file_item
+        item_cache.update((item.name, item) for item in file_item.create_definition_items(item_cache={}))
+
+    # Populate a graph from a seed routine
+    seed = '#comp1'
+    full_graph = nx.DiGraph()
+    full_graph.add_node(item_cache[seed])
+
+    dependencies = item_cache[seed].create_dependency_items(item_cache=item_cache)
+    full_graph.add_nodes_from(dependencies)
+    full_graph.add_edges_from((item_cache[seed], item) for item in dependencies)
+
+    queue = deque()
+    queue.extend(dependencies)
+
+    while queue:
+        item = queue.popleft()
+        dependencies = item.create_dependency_items(item_cache=item_cache)
+        new_items = [i for i in dependencies if i not in full_graph]
+        if new_items:
+            full_graph.add_nodes_from(new_items)
+            queue.extend(new_items)
+        full_graph.add_edges_from((item, dependency) for dependency in dependencies)
+
+    expected_dependencies = {
+        '#comp1': ('header_mod', 't_mod#t', '#comp2', 't_mod#t%proc', 't_mod#t%no%way'),
+        '#comp2': ('header_mod#k', 't_mod#t', 'a_mod#a', 'b_mod#b', 't_mod#t%yay%proc'),
+        'a_mod#a': ('header_mod#k',),
+        'b_mod#b': ('header_mod#k',),
+        't_mod#t': ('tt_mod#tt',),
+        't_mod#t%proc': ('t_mod#t_proc',),
+        't_mod#t_proc': ('tt_mod#tt',),
+        't_mod#t%no%way': ('t_mod#t1%way',),
+        't_mod#t%yay%proc': ('tt_mod#tt%proc',),
+        't_mod#t1%way': ('t_mod#my_way',),
+        't_mod#my_way': ('tt_mod#tt',),
+        'tt_mod#tt': ('header_mod#k',),
+        'tt_mod#tt%proc': ('tt_mod#tt_proc',),
+        'tt_mod#tt_proc': ('header_mod#k',),
+        'header_mod': (),
+        'header_mod#k': (),
+    }
+
+    assert len(full_graph) == len(expected_dependencies)
+    assert all(key in full_graph for key in expected_dependencies)
+
+    edges = tuple((a.name, b.name) for a, b in full_graph.edges)
+    for node, dependencies in expected_dependencies.items():
+        for dependency in dependencies:
+            assert (node, dependency) in edges
+    assert len(edges) == sum(len(dependencies) for dependencies in expected_dependencies.values())
+
+    # Note: quick visualization for debugging can be done using matplotlib
+    #   import matplotlib.pyplot as plt
+    #   nx.draw_planar(full_graph, with_labels=True)

From 22426a897570c36a0d792a73733f846ff67fd45f Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Fri, 19 May 2023 09:57:55 +0100
Subject: [PATCH 11/23] Homogenize access to typedefs and imports in program
 units

---
 loki/module.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/loki/module.py b/loki/module.py
index 4bde828ff..8c4fe7ea1 100644
--- a/loki/module.py
+++ b/loki/module.py
@@ -294,12 +294,6 @@ def __setstate__(self, s):
         # Ensure that we are attaching all symbols to the newly create ``self``.
         self.rescope_symbols()
 
-    @property
-    def variables(self):
-        return tuple(flatten(
-            decl.symbols for decl in FindNodes(VariableDeclaration).visit(self.spec or ())
-        ))
-
     @property
     def definitions(self):
         """

From cc844f7d8571f80537c0ceffb831dc77c6626fd9 Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Fri, 19 May 2023 14:34:56 +0100
Subject: [PATCH 12/23] regex: parse kind in declarations

---
 loki/frontend/regex.py                         | 7 ++++++-
 tests/sources/projTypeBound/typebound_item.F90 | 3 ++-
 tests/test_frontends.py                        | 7 ++++++-
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/loki/frontend/regex.py b/loki/frontend/regex.py
index e0dc07564..1118d1c17 100644
--- a/loki/frontend/regex.py
+++ b/loki/frontend/regex.py
@@ -900,7 +900,7 @@ class VariableDeclarationPattern(Pattern):
     def __init__(self):
         super().__init__(
             r'^(((?:type|class)[ \t]*\([ \t]*(?P<typename>\w+)[ \t]*\))|' # TYPE or CLASS keyword with typename
-            r'^([ \t]*(?P<basic_type>(logical|real|integer|complex|character))(\((kind|len)=[a-z0-9_-]+\))?[ \t]*))'
+            r'^([ \t]*(?P<basic_type>(logical|real|integer|complex|character))(?P<param>\((kind|len)=[a-z0-9_-]+\))?[ \t]*))'
             r'(?:[ \t]*,[ \t]*[a-z]+(?:\((.(\(.*\))?)*?\))?)*'  # Optional attributes
             r'(?:[ \t]*::)?'  # Optional `::` delimiter
             r'[ \t]*'  # Some white space
@@ -932,6 +932,11 @@ def match(self, reader, parser_classes, scope):
             type_ = SymbolAttributes(BasicType.from_str(match['basic_type']))
         assert type_
 
+        if match['param']:
+            param = match['param'].strip().strip('()').split('=')
+            if len(param) == 1 or param[0].lower() == 'kind':
+                type_ = type_.clone(kind=sym.Variable(name=param[-1], scope=scope))
+
         variables = self._remove_quoted_string_nested_parentheses(match['variables'])  # Remove dimensions
         variables = re.sub(r'=(?:>)?[^,]*(?=,|$)', r'', variables) # Remove initialization
         variables = variables.replace(' ', '').split(',')  # Variable names without white space
diff --git a/tests/sources/projTypeBound/typebound_item.F90 b/tests/sources/projTypeBound/typebound_item.F90
index 60c7a687a..394e47e56 100644
--- a/tests/sources/projTypeBound/typebound_item.F90
+++ b/tests/sources/projTypeBound/typebound_item.F90
@@ -53,9 +53,10 @@ subroutine driver
     use typebound_other, only: other => other_type
     implicit none
 
+    integer, parameter :: constant = 2
     type(some_type), allocatable :: obj(:), obj2(:,:)
     type(header_type) :: header
-    type(other) :: other_obj, derived(2)
+    type(other) :: other_obj, derived(constant)
     real :: x
     integer :: i
 
diff --git a/tests/test_frontends.py b/tests/test_frontends.py
index d566030b8..9dd3d8c65 100644
--- a/tests/test_frontends.py
+++ b/tests/test_frontends.py
@@ -1180,6 +1180,11 @@ def test_regex_loki_69():
     calls = FindNodes(CallStatement).visit(source['test'].ir)
     assert [call.name for call in calls] == ['RANDOM_CALL_0', 'random_call_2']
 
+    variable_map_test = source['test'].variable_map
+    v_in_type = variable_map_test['v_in'].type
+    assert v_in_type.dtype is BasicType.REAL
+    assert v_in_type.kind == 'jprb'
+
 
 def test_regex_variable_declaration(here):
     """
@@ -1189,7 +1194,7 @@ def test_regex_variable_declaration(here):
     source = Sourcefile.from_file(filepath, frontend=REGEX)
 
     driver = source['driver']
-    assert driver.variables == ('obj', 'obj2', 'header', 'other_obj', 'derived', 'x', 'i')
+    assert driver.variables == ('constant', 'obj', 'obj2', 'header', 'other_obj', 'derived', 'x', 'i')
     assert source['module_routine'].variables == ('m',)
     assert source['other_routine'].variables == ('self', 'm', 'j')
     assert source['routine'].variables == ('self',)

From df58f87e14588f9e8c2dbc78d5c288a2d0849136 Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Fri, 19 May 2023 14:39:28 +0100
Subject: [PATCH 13/23] On-demand graph building improved and more tests

---
 loki/bulk/item.py                            | 220 +++++++----
 loki/scope.py                                |  12 +
 tests/sources/projBatch/module/other_mod.F90 |  11 +
 tests/sources/projBatch/module/t_mod.F90     |   7 +-
 tests/sources/projBatch/module/tt_mod.F90    |   3 +
 tests/sources/projBatch/source/comp1.F90     |   3 +-
 tests/test_batch.py                          | 385 ++++++++++++++-----
 7 files changed, 458 insertions(+), 183 deletions(-)
 create mode 100644 tests/sources/projBatch/module/other_mod.F90

diff --git a/loki/bulk/item.py b/loki/bulk/item.py
index de895f462..0d4096704 100644
--- a/loki/bulk/item.py
+++ b/loki/bulk/item.py
@@ -10,12 +10,13 @@
 import sys
 
 from loki.frontend import REGEX, RegexParserClass
-from loki.expression import TypedSymbol, MetaSymbol, ProcedureSymbol
+from loki.expression import TypedSymbol, MetaSymbol, ProcedureSymbol, FindVariables
 from loki.ir import Import, CallStatement, TypeDef, ProcedureDeclaration
 from loki.logging import warning
 from loki.module import Module
 from loki.subroutine import Subroutine
 from loki.tools import as_tuple, flatten, CaseInsensitiveDict
+from loki.types import DerivedType
 from loki.visitors import FindNodes
 
 
@@ -139,9 +140,7 @@ def ir(self):
     def _parser_classes_from_item_type_names(self, item_type_names):
         item_types = [getattr(sys.modules[__name__], name) for name in item_type_names]
         parser_classes = [p for item_type in item_types if (p := item_type._parser_class) is not None]
-        if parser_classes:
-            return reduce(lambda x, y: x | y, parser_classes)
-        return None
+        return reduce(lambda x, y: x | y, parser_classes, RegexParserClass.EmptyClass)
 
     def concretize_definitions(self):
         parser_classes = self._parser_classes_from_item_type_names(self._defines_items)
@@ -155,33 +154,43 @@ def concretize_dependencies(self):
                 ir = ir.parent
             ir.make_complete(frontend=REGEX, parser_classes=self._depends_class)
 
+    @staticmethod
+    def _get_or_create_item(item_cls, item_name, item_cache, source=None, module_name=None):
+        if item_name in item_cache:
+            return item_cache[item_name]
+
+        if not source:
+            if not module_name:
+                raise ValueError('Need to provide source or module_name')
+            if module_name not in item_cache:
+                raise RuntimeError(f'Module {module_name} not found in item_cache')
+            source = item_cache[module_name].source
+
+        item = item_cls(item_name, source=source)
+        item_cache[item_name] = item
+        return item
+
     def create_from_ir(self, node, item_cache):
         if isinstance(node, Module):
             item_name = node.name.lower()
-            items = as_tuple(item_cache.get(item_name))
-            if not items:
-                assert node in self.source.modules
-                items = as_tuple(ModuleItem(item_name, source=self.source))
+            items = as_tuple(self._get_or_create_item(ModuleItem, item_name, item_cache, source=self.source))
 
         elif isinstance(node, Subroutine):
             item_name = f'{getattr(node.parent, "name", "")}#{node.name}'.lower()
-            items = as_tuple(item_cache.get(item_name))
-            if not items:
-                assert node in self.source.all_subroutines
-                items = as_tuple(ProcedureItem(item_name, source=self.source))
+            items = as_tuple(self._get_or_create_item(ProcedureItem, item_name, item_cache, source=self.source))
 
         elif isinstance(node, TypeDef):
             item_name = f'{node.parent.name}#{node.name}'.lower()
-            items = as_tuple(item_cache.get(item_name))
-            if not items:
-                assert node.parent in self.source.modules
-                items = as_tuple(TypeDefItem(item_name, source=self.source))
+            items = as_tuple(self._get_or_create_item(TypeDefItem, item_name, item_cache, source=self.source))
 
         elif isinstance(node, Import):
             # If we have a fully-qualified import (which we hopefully have),
             # we create a dependency for every imported symbol, otherwise we
             # depend only on the imported module
-            module_item = item_cache[node.module.lower()]
+            module_name = node.module.lower()
+            if module_name not in item_cache:
+                raise RuntimeError(f'Module {module_name} not found in item_cache')
+            module_item = item_cache[module_name]
             if node.symbols:
                 module_definitions = {
                     item.local_name: item for item in module_item.create_definition_items(item_cache=item_cache)
@@ -196,66 +205,89 @@ def create_from_ir(self, node, item_cache):
                 # This is a typebound procedure call, we are only resolving
                 # to the type member by mapping the local name to the type name
                 type_name = node.name.parents[0].type.dtype.name.lower()
-                # Find the module where the type is defined
-                if type_name in node.name.scope.imported_symbols:
-                    for imprt in node.name.scope.imports:
-                        if type_name in imprt.symbols:
-                            module_name = imprt.module.lower()
-                            break
+                # Find the module where the type is defined:
+                scope = node.name.scope
+                # 1. Import in current scope
+                imprt = scope.import_map.get(type_name)
+                # 2. Import in parent scope
+                if not imprt and scope.parent:
+                    imprt = scope.parent.import_map.get(type_name)
+                if imprt:
+                    module_name = imprt.module
+                # 3. Declared in parent scope
+                elif scope.parent and type_name in scope.parent.typedef_map:
+                    module_name = scope.parent.name
+                # 4. Unknown
                 else:
-                    # TODO: Resolve call to type-bound procedure
-                    raise NotImplementedError()
+                    raise RuntimeError(f'Unable to find the module declaring {type_name}')
+
                 item_name = f'{module_name}#{type_name}%{"%".join(node.name.name_parts[1:])}'.lower()
-                items = as_tuple(item_cache.get(item_name))
-                if not items:
-                    module_item = item_cache[module_name]
-                    items = as_tuple(ProcedureBindingItem(item_name, source=module_item.source))
+                items = as_tuple(self._get_or_create_item(
+                    ProcedureBindingItem, item_name, item_cache, module_name=module_name
+                ))
             elif procedure_name in self.ir.imported_symbols:
                 # This is a call to a module procedure which has been imported via
                 # a fully qualified import
-                for imprt in self.ir.imports:
-                    if procedure_name in imprt.symbols:
-                        # TODO: Handle renaming
-                        module_name = imprt.module.lower()
-                        break
+                module_name = self.ir.import_map.get(procedure_name).module
+                item_name = f'{module_name}#{procedure_name}'.lower()
+                items = as_tuple(self._get_or_create_item(
+                    ProcedureItem, item_name, item_cache, module_name=module_name
+                ))
+            elif self.ir.parent and procedure_name in self.ir.parent.imported_symbols:
+                # This is a call to a module procedure which has been imported via
+                # a fully qualified import in the parent scope
+                module_name = self.ir.parent.import_map.get(procedure_name).module
                 item_name = f'{module_name}#{procedure_name}'.lower()
-                items = as_tuple(item_cache.get(item_name))
-                if not items:
-                    module_item = item_cache[module_name]
-                    items = as_tuple(ProcedureBindingItem(item_name, source=module_item.source))
+                items = as_tuple(self._get_or_create_item(
+                    ProcedureItem, item_name, item_cache, module_name=module_name
+                ))
+
             elif procedure_name in (intf_map := self.ir.interface_symbols):
                 # TODO: Handle declaration via interface
                 raise NotImplementedError()
             else:
+                # This is a call to a subroutine declared via header-included interface
                 item_name = f'#{procedure_name}'.lower()
-                items = (item_cache[item_name],)
+                items = as_tuple(item_cache[item_name])
 
         elif isinstance(node, ProcedureSymbol):
-            # This is a procedure binding
+            # This is a procedure binding, presumably to a routine that is
+            # bound to a derived type that is nested into another derived type
             assert '%' in node.name
-            type_name = node.parent.type.dtype.name
+            type_name = node.parents[0].type.dtype.name.lower()
             proc_name = '%'.join(node.name_parts[1:])
-            module = node.scope.parent
-            if type_name in module.typedefs:
-                module_name = module.name.lower()
+
+            # Find the module where the type is defined:
+            scope = node.scope
+            # 1. Import in current scope
+            if hasattr(scope, 'import_map'):
+                imprt = scope.import_map.get(type_name)
+            else:
+                imprt = None
+            # 2. Import in parent scope
+            if not imprt and scope.parent:
+                imprt = scope.parent.import_map.get(type_name)
+            if imprt:
+                module_name = imprt.module
+            # 3. Declared in parent scope
+            elif scope.parent and type_name in scope.parent.typedef_map:
+                module_name = scope.parent.name
+            # 4. Unknown
             else:
-                for imprt in module.imports:
-                    if type_name in imprt.symbols:
-                        module_name = imprt.module.lower()
-                        break
+                raise RuntimeError(f'Unable to find the module declaring {type_name}')
+
             item_name = f'{module_name}#{type_name}%{proc_name}'.lower()
-            items = as_tuple(item_cache.get(item_name))
-            if not items:
-                module_item = item_cache[module_name]
-                items = as_tuple(ProcedureBindingItem(item_name, source=module_item.source))
+            items = as_tuple(self._get_or_create_item(
+                ProcedureBindingItem, item_name, item_cache, module_name=module_name
+            ))
 
         elif isinstance(node, (TypedSymbol, MetaSymbol)):
             # This is a global variable
             item_name = f'{node.scope.name}#{node.name}'.lower()
-            items = as_tuple(item_cache.get(item_name))
-            if not items:
-                module_item = item_cache[node.scope.name.lower()]
-                items = as_tuple(GlobalVariableItem(item_name, source=module_item.source))
+            items = as_tuple(self._get_or_create_item(
+                GlobalVariableItem, item_name, item_cache, module_name=node.scope.name
+            ))
+
         else:
             raise ValueError(f'{node} has an unsupported node type {type(node)}')
 
@@ -280,7 +312,7 @@ def create_dependency_items(self, item_cache, only=None):
 
         if only:
             items = tuple(item for item in items if isinstance(item, only))
-        return items
+        return tuple(dict.fromkeys(items))
 
     def clear_cached_property(self, property_name):
         """
@@ -294,14 +326,17 @@ def scope_name(self):
         """
         The name of this item's scope
         """
-        return self.name[:self.name.index('#')] or None
+        pos = self.name.find('#')
+        if pos == -1:
+            return None
+        return self.name[:pos]
 
     @property
     def local_name(self):
         """
         The item name without the scope
         """
-        return self.name[self.name.index('#')+1:]
+        return self.name[self.name.find('#')+1:]
 
     @cached_property
     def scope(self):
@@ -658,14 +693,10 @@ def definitions(self):
     def ir(self):
         return self.source
 
-    @property
-    def local_name(self):
-        return self.name
-
 
 class ModuleItem(Item):
 
-    _parser_class = RegexParserClass.ProgramUnitClass #| RegexParserClass.ImportClass
+    _parser_class = RegexParserClass.ProgramUnitClass
     _defines_items = ('ProcedureItem', 'TypeDefItem', 'GlobalVariableItem')
     _depends_class = RegexParserClass.ImportClass
 
@@ -687,26 +718,53 @@ class ProcedureItem(Item):
 
     _parser_class = RegexParserClass.ProgramUnitClass
     _depends_class = (
-        RegexParserClass.ImportClass | RegexParserClass.InterfaceClass |
+        RegexParserClass.ImportClass | RegexParserClass.InterfaceClass | RegexParserClass.TypeDefClass |
         RegexParserClass.DeclarationClass | RegexParserClass.CallClass
     )
 
     @property
     def _dependencies(self):
-        calls = tuple(FindNodes(CallStatement).visit(self.ir.ir))
+        calls = tuple({call.name.name: call for call in FindNodes(CallStatement).visit(self.ir.ir)}.values())
         imports = self.ir.imports
-        if self.ir.parent:
-            imports += self.ir.parent.imports
-        return as_tuple(imports) + calls
+        typedefs = ()
+
+        # Create dependencies on type definitions that may have been declared in or
+        # imported via the module scope
+        if self.scope:
+            type_names = [
+                dtype.name for var in self.ir.variables
+                if isinstance((dtype := var.type.dtype), DerivedType)
+            ]
+            if type_names:
+                typedef_map = self.scope.typedef_map
+                import_map = self.scope.import_map
+                typedefs += tuple(typedef for type_name in type_names if (typedef := typedef_map.get(type_name)))
+                imports += tuple(imprt for type_name in type_names if (imprt := import_map.get(type_name)))
+        return imports + typedefs + calls
 
 
 class TypeDefItem(Item):
 
-    _parser_class = RegexParserClass.TypeDefClass #| RegexParserClass.DeclarationClass
+    _parser_class = RegexParserClass.TypeDefClass
+    _depends_class = RegexParserClass.DeclarationClass
 
     @property
     def _dependencies(self):
-        return as_tuple(self.ir.parent.imports)
+        # We restrict dependencies to derived types used in the typedef
+        imports = ()
+        typedefs = ()
+
+        type_names = [
+            dtype.name for var in self.ir.variables
+            if isinstance((dtype := var.type.dtype), DerivedType)
+        ]
+        if type_names:
+            typedef_map = self.scope.typedef_map
+            import_map = self.scope.import_map
+            typedefs = tuple(typedef for type_name in type_names if (typedef := typedef_map.get(type_name)))
+            imports = tuple(imprt for type_name in type_names if (imprt := import_map.get(type_name)))
+
+        return tuple(dict.fromkeys(imports + typedefs))
 
 
 class InterfaceItem(Item):
@@ -718,6 +776,14 @@ class GlobalVariableItem(Item):
 
     _parser_class = RegexParserClass.DeclarationClass
 
+    @property
+    def ir(self):
+        local_name = self.local_name
+        for decl in self.scope.declarations:
+            if local_name in decl.symbols:
+                return decl
+        raise RuntimeError(f'Declaration for {local_name} cannot be found in {self.scope_name}')
+
 
 class SubroutineItem(Item):
     """
@@ -832,7 +898,7 @@ class ProcedureBindingItem(Item):
     """
 
     _parser_class = RegexParserClass.CallClass
-    # _depends_items = ('ProcedureItem',)
+    _depends_class = RegexParserClass.DeclarationClass
 
     @property
     def ir(self):
@@ -840,18 +906,12 @@ def ir(self):
         typedef = self.source[name_parts[0]]
         for decl in typedef.declarations:
             if name_parts[1] in decl.symbols:
-                return decl
+                return decl.symbols[decl.symbols.index(name_parts[1])]
         raise RuntimeError(f'Declaration for {self.name} not found')
 
-    @property
-    def symbol(self):
-        local_name = self.local_name.split('%')[1]
-        decl = self.ir
-        return decl.symbols[decl.symbols.index(local_name)]
-
     @property
     def _dependencies(self):
-        symbol = self.symbol
+        symbol = self.ir
         name_parts = self.local_name.split('%')
         if len(name_parts) == 2:
             # TODO: generic bindings
diff --git a/loki/scope.py b/loki/scope.py
index 50d08fd91..6c79246d9 100644
--- a/loki/scope.py
+++ b/loki/scope.py
@@ -283,6 +283,18 @@ def rescope_symbols(self):
         from loki.expression import AttachScopes  # pylint: disable=import-outside-toplevel,cyclic-import
         AttachScopes().visit(self)
 
+    def make_complete(self, **frontend_args):
+        """
+        Trigger a re-parse of the object if incomplete to produce a full Loki IR
+
+        See :any:`ProgramUnit.make_complete` for more details.
+
+        This method relays the call only to the :attr:`parent`.
+        """
+        if hasattr(super(), 'make_complete'):
+            super().make_complete(**frontend_args)
+        self.parent.make_complete(**frontend_args)
+
     def clone(self, **kwargs):
         """
         Create a copy of the scope object with the option to override individual
diff --git a/tests/sources/projBatch/module/other_mod.F90 b/tests/sources/projBatch/module/other_mod.F90
new file mode 100644
index 000000000..fe04e9225
--- /dev/null
+++ b/tests/sources/projBatch/module/other_mod.F90
@@ -0,0 +1,11 @@
+module other_mod
+    use tt_mod, only: tt
+    use b_mod, only: b
+    implicit none
+contains
+    subroutine mod_proc(arg)
+        type(tt), intent(inout) :: arg
+        call arg%proc()
+        call b(arg%indirection)
+    end subroutine mod_proc
+end module other_mod
diff --git a/tests/sources/projBatch/module/t_mod.F90 b/tests/sources/projBatch/module/t_mod.F90
index 358ab2709..abf677382 100644
--- a/tests/sources/projBatch/module/t_mod.F90
+++ b/tests/sources/projBatch/module/t_mod.F90
@@ -1,5 +1,6 @@
 module t_mod
     use tt_mod, only: tt
+    use a_mod, only: a
     implicit none
 
     type t1
@@ -16,9 +17,13 @@ module t_mod
 contains
     subroutine t_proc(this)
         class(t), intent(inout) :: this
+        call a(this%yay%other)
+        call this%yay%proc()
     end subroutine t_proc
 
-    subroutine my_way(this)
+    subroutine my_way(this, recurse)
         class(t1), intent(inout) :: this
+        logical, intent(in) :: recurse
+        if (recurse) call this%way(.false.)
     end subroutine my_way
 end module t_mod
diff --git a/tests/sources/projBatch/module/tt_mod.F90 b/tests/sources/projBatch/module/tt_mod.F90
index 211ccabdd..c043a50e6 100644
--- a/tests/sources/projBatch/module/tt_mod.F90
+++ b/tests/sources/projBatch/module/tt_mod.F90
@@ -2,8 +2,11 @@ module tt_mod
     use header_mod, only: k
     implicit none
 
+    integer, parameter :: nclv = 5
+
     type tt
         real(kind=k), allocatable :: indirection(:)
+        real(kind=k) :: other(nclv)
     contains
         procedure :: proc => tt_proc
     end type tt
diff --git a/tests/sources/projBatch/source/comp1.F90 b/tests/sources/projBatch/source/comp1.F90
index ae01569c2..0d595e659 100644
--- a/tests/sources/projBatch/source/comp1.F90
+++ b/tests/sources/projBatch/source/comp1.F90
@@ -7,5 +7,6 @@ subroutine comp1 (arg, val)
 #include "comp2.intfb.h"
     call arg%proc()
     call comp2(arg, val)
-    call arg%no%way()
+    call comp2(arg, val)  ! Twice to check we're not duplicating dependencies
+    call arg%no%way(.true.)
 end subroutine comp1
diff --git a/tests/test_batch.py b/tests/test_batch.py
index ea7b5d5a9..a7c7aa62f 100644
--- a/tests/test_batch.py
+++ b/tests/test_batch.py
@@ -12,9 +12,9 @@
 import pytest
 
 from loki import (
-    HAVE_FP, HAVE_OFP, REGEX, RegexParserClass, as_tuple,
+    HAVE_FP, HAVE_OFP, REGEX, RegexParserClass, as_tuple, CaseInsensitiveDict,
     FileItem, ModuleItem, ProcedureItem, TypeDefItem, ProcedureBindingItem, GlobalVariableItem,
-    Sourcefile, Section, RawSource, Import, CallStatement
+    Sourcefile, Section, RawSource, Import, CallStatement, Scalar
 )
 
 pytestmark = pytest.mark.skipif(not HAVE_FP and not HAVE_OFP, reason='Fparser and OFP not available')
@@ -25,20 +25,52 @@ def fixture_here():
     return Path(__file__).parent
 
 
-def test_file_item(here):
-    proj = here/'sources/projBatch'
+@pytest.fixture(scope='module', name='comp1_expected_dependencies')
+def fixture_comp1_expected_dependencies():
+    return {
+        '#comp1': ('header_mod', 't_mod#t', '#comp2', 't_mod#t%proc', 't_mod#t%no%way'),
+        '#comp2': ('header_mod#k', 't_mod#t', 'a_mod#a', 'b_mod#b', 't_mod#t%yay%proc'),
+        'a_mod#a': ('header_mod#k',),
+        'b_mod#b': (),
+        't_mod#t': ('tt_mod#tt', 't_mod#t1'),
+        't_mod#t1': (),
+        't_mod#t%proc': ('t_mod#t_proc',),
+        't_mod#t_proc': ('t_mod#t', 'a_mod#a', 't_mod#t%yay%proc'),
+        't_mod#t%no%way': ('t_mod#t1%way',),
+        't_mod#t%yay%proc': ('tt_mod#tt%proc',),
+        't_mod#t1%way': ('t_mod#my_way',),
+        't_mod#my_way': ('t_mod#t1', 't_mod#t1%way'),
+        'tt_mod#tt': (),
+        'tt_mod#tt%proc': ('tt_mod#tt_proc',),
+        'tt_mod#tt_proc': ('tt_mod#tt',),
+        'header_mod': (),
+        'header_mod#k': (),
+    }
+
+
+
+def get_item(cls, path, name, parser_classes):
+    source = Sourcefile.from_file(path, frontend=REGEX, parser_classes=parser_classes)
+    return cls(name, source=source)
 
-    def get_item(path, parser_classes):
-        filepath = proj/path
-        return FileItem(
-            filepath.name.lower(),
-            Sourcefile.from_file(filepath, frontend=REGEX, parser_classes=parser_classes)
-        )
+
+def test_file_item1(here):
+    proj = here/'sources/projBatch'
 
     # A file with simple module that contains a single subroutine
-    item = get_item('module/a_mod.F90', RegexParserClass.EmptyClass)
-    assert item.name == 'a_mod.f90'
+    item = get_item(FileItem, proj/'module/a_mod.F90', 'module/a_mod.F90', RegexParserClass.EmptyClass)
+    assert item.name == 'module/a_mod.F90'
+    assert item.local_name == item.name
+    assert item.scope_name is None
+    assert not item.scope
     assert item.ir is item.source
+    assert str(item) == 'loki.bulk.FileItem<module/a_mod.F90>'
+
+    # A few checks on the item comparison
+    assert item == 'module/a_mod.F90'
+    assert item != FileItem('some_name', source=item.source)
+    assert item == FileItem(item.name, source=item.source)
+
     # The file is not parsed at all
     assert not item.source.definitions
     assert isinstance(item.source.ir, Section)
@@ -50,11 +82,12 @@ def get_item(path, parser_classes):
     assert len(item.source.definitions) == 1
     items = item.create_definition_items(item_cache={})
     assert len(items) == 1
+    assert items[0] != None  # pylint: disable=singleton-comparison  # (intentionally trigger __eq__ here)
     assert items[0].name == 'a_mod'
     assert items[0].definitions == (item.source['a'],)
 
-    item = get_item('module/a_mod.F90', RegexParserClass.ProgramUnitClass)
-    assert item.name == 'a_mod.f90'
+    item = get_item(FileItem, proj/'module/a_mod.F90', 'module/a_mod.F90', RegexParserClass.ProgramUnitClass)
+    assert item.name == 'module/a_mod.F90'
     assert item.definitions == (item.source['a_mod'],)
     assert item.ir is item.source
     items = item.create_definition_items(item_cache={})
@@ -62,9 +95,13 @@ def get_item(path, parser_classes):
     assert items[0].name == 'a_mod'
     assert items[0].definitions == (item.source['a'],)
 
+
+def test_file_item2(here):
+    proj = here/'sources/projBatch'
+
     # A file with a simple module that contains a single typedef
-    item = get_item('module/t_mod.F90', RegexParserClass.ProgramUnitClass)
-    assert item.name == 't_mod.f90'
+    item = get_item(FileItem, proj/'module/t_mod.F90', 'module/t_mod.F90', RegexParserClass.ProgramUnitClass)
+    assert item.name == 'module/t_mod.F90'
     assert item.definitions == (item.source['t_mod'],)
 
     items = item.create_definition_items(item_cache={})
@@ -77,16 +114,23 @@ def get_item(path, parser_classes):
     assert items[0].definitions == (
         items[0].ir['t_proc'],
         items[0].ir['my_way'],
-        items[0].ir.typedefs['t1'],
-        items[0].ir.typedefs['t']
+        items[0].ir.typedef_map['t1'],
+        items[0].ir.typedef_map['t']
     )
 
     # Files don't have dependencies (direct dependencies, anyway)
     assert item.dependencies is ()
 
+
+def test_file_item3(here):
+    proj = here/'sources/projBatch'
+
     # The same file but with typedefs parsed from the get-go
-    item = get_item('module/t_mod.F90', RegexParserClass.ProgramUnitClass | RegexParserClass.TypeDefClass)
-    assert item.name == 't_mod.f90'
+    item = get_item(
+        FileItem, proj/'module/t_mod.F90', 'module/t_mod.F90',
+        RegexParserClass.ProgramUnitClass | RegexParserClass.TypeDefClass
+    )
+    assert item.name == 'module/t_mod.F90'
     assert item.definitions == (item.source['t_mod'],)
 
     items = item.create_definition_items(item_cache={})
@@ -108,18 +152,15 @@ def get_item(path, parser_classes):
     assert items[0].ir == item.source['t_mod']
 
 
-def test_module_item(here):
+def test_module_item1(here):
     proj = here/'sources/projBatch'
 
-    def get_item(path, name, parser_classes):
-        filepath = proj/path
-        source = Sourcefile.from_file(filepath, frontend=REGEX, parser_classes=parser_classes)
-        return ModuleItem(name, source=source)
-
     # A file with simple module that contains a single subroutine and has no dependencies on
     # the module level
-    item = get_item('module/a_mod.F90', 'a_mod', RegexParserClass.ProgramUnitClass)
+    item = get_item(ModuleItem, proj/'module/a_mod.F90', 'a_mod', RegexParserClass.ProgramUnitClass)
     assert item.name == 'a_mod'
+    assert item == 'a_mod'
+    assert str(item) == 'loki.bulk.ModuleItem<a_mod>'
     assert item.ir is item.source['a_mod']
     assert item.definitions == (item.source['a'],)
 
@@ -130,9 +171,13 @@ def get_item(path, name, parser_classes):
 
     assert not item.dependencies
 
+
+def test_module_item2(here):
+    proj = here/'sources/projBatch'
+
     # A different file with a simple module that contains a single subroutine but has an import
     # dependency on the module level
-    item = get_item('module/b_mod.F90', 'b_mod', RegexParserClass.ProgramUnitClass)
+    item = get_item(ModuleItem, proj/'module/b_mod.F90', 'b_mod', RegexParserClass.ProgramUnitClass)
     assert item.name == 'b_mod'
     assert item.ir is item.source['b_mod']
     assert item.definitions == (item.source['b'],)
@@ -147,23 +192,24 @@ def get_item(path, name, parser_classes):
     assert isinstance(dependencies[0], Import)
     assert dependencies[0].module == 'header_mod'
 
+
+def test_module_item3(here):
+    proj = here/'sources/projBatch'
+
     # Make sure the dependencies are also found correctly if done without parsing definitions first
-    item = get_item('module/b_mod.F90', 'b_mod', RegexParserClass.ProgramUnitClass)
+    item = get_item(ModuleItem, proj/'module/b_mod.F90', 'b_mod', RegexParserClass.ProgramUnitClass)
     dependencies = item.dependencies
     assert len(dependencies) == 1 and dependencies[0].module == 'header_mod'
 
 
-def test_procedure_item(here):
+def test_procedure_item1(here):
     proj = here/'sources/projBatch'
 
-    def get_item(path, name, parser_classes):
-        filepath = proj/path
-        source = Sourcefile.from_file(filepath, frontend=REGEX, parser_classes=parser_classes)
-        return ProcedureItem(name, source=source)
-
     # A file with a single subroutine definition that calls a routine via interface block
-    item = get_item('source/comp1.F90', '#comp1', RegexParserClass.ProgramUnitClass)
+    item = get_item(ProcedureItem, proj/'source/comp1.F90', '#comp1', RegexParserClass.ProgramUnitClass)
     assert item.name == '#comp1'
+    assert item == '#comp1'
+    assert str(item) == 'loki.bulk.ProcedureItem<#comp1>'
     assert item.ir is item.source['comp1']
     assert item.definitions is ()
 
@@ -182,8 +228,34 @@ def get_item(path, name, parser_classes):
     assert isinstance(dependencies[4], CallStatement)
     assert dependencies[4].name == 'arg%no%way'
 
+    # We need to have suitable dependency modules in the cache to spawn the dependency items
+    item_cache = {item.name: item}
+    item_cache = {
+        (i := get_item(ModuleItem, proj/path, name, RegexParserClass.ProgramUnitClass)).name: i
+        for path, name in [
+            ('module/t_mod.F90', 't_mod'), ('source/comp2.F90', '#comp2'), ('headers/header_mod.F90', 'header_mod')
+        ]
+    }
+
+    # To ensure any existing items from the item_cache are re-used, we instantiate one for
+    # the procedure binding
+    t_mod_t_proc = get_item(
+        ProcedureBindingItem, proj/'module/t_mod.F90', 't_mod#t%proc',
+        RegexParserClass.ProgramUnitClass | RegexParserClass.TypeDefClass | RegexParserClass.DeclarationClass
+    )
+    item_cache[t_mod_t_proc.name] = t_mod_t_proc
+
+    items = item.create_dependency_items(item_cache=item_cache)
+    assert items == ('t_mod#t', 'header_mod', 't_mod#t%proc', '#comp2', 't_mod#t%no%way')
+    assert item_cache[t_mod_t_proc.name] is t_mod_t_proc
+    assert items[2] is t_mod_t_proc
+
+
+def test_procedure_item2(here):
+    proj = here/'sources/projBatch'
+
     # A file with a single subroutine definition that calls two routines via module imports
-    item = get_item('source/comp2.F90', '#comp2', RegexParserClass.ProgramUnitClass)
+    item = get_item(ProcedureItem, proj/'source/comp2.F90', '#comp2', RegexParserClass.ProgramUnitClass)
     assert item.name == '#comp2'
     assert item.ir is item.source['comp2']
     assert item.definitions is ()
@@ -207,77 +279,204 @@ def get_item(path, name, parser_classes):
     assert isinstance(dependencies[6], CallStatement)
     assert dependencies[6].name == 'arg%yay%proc'
 
+    # We need to have suitable dependency modules in the cache to spawn the dependency items
+    item_cache = {item.name: item}
+    item_cache = {
+        (i := get_item(ModuleItem, proj/path, name, RegexParserClass.ProgramUnitClass)).name: i
+        for path, name in [
+            ('module/t_mod.F90', 't_mod'), ('module/a_mod.F90', 'a_mod'),
+            ('module/b_mod.F90', 'b_mod'), ('headers/header_mod.F90', 'header_mod')
+        ]
+    }
+    items = item.create_dependency_items(item_cache=item_cache)
+    assert items == ('t_mod#t', 'header_mod#k', 'a_mod#a', 'b_mod#b', 't_mod#t%yay%proc')
+
+    # Does it still work if we call it again?
+    assert items == item.create_dependency_items(item_cache=item_cache)
 
-def test_typedef_item(here):
+
+def test_procedure_item3(here):
+    proj = here/'sources/projBatch'
+
+    # A file with a single subroutine declared in a module that calls a typebound procedure
+    # where the type is imported via an import statement in the module scope
+    item = get_item(
+        ProcedureItem, proj/'module/other_mod.F90', 'other_mod#mod_proc',
+        RegexParserClass.ProgramUnitClass
+    )
+    dependencies = item.dependencies
+    assert len(dependencies) == 3
+    assert dependencies[0].module == 'tt_mod'
+    assert dependencies[1].name == 'arg%proc'
+    assert dependencies[2].name == 'b'
+
+    item_cache = {
+        item.name: item,
+        'tt_mod': get_item(ModuleItem, proj/'module/tt_mod.F90', 'tt_mod', RegexParserClass.ProgramUnitClass),
+        'b_mod': get_item(ModuleItem, proj/'module/b_mod.F90', 'b_mod', RegexParserClass.ProgramUnitClass)
+    }
+    assert item.create_dependency_items(item_cache=item_cache) == ('tt_mod#tt', 'tt_mod#tt%proc', 'b_mod#b')
+
+
+def test_procedure_item4(here):
     proj = here/'sources/projBatch'
 
-    def get_item(path, name, parser_classes):
-        filepath = proj/path
-        source = Sourcefile.from_file(filepath, frontend=REGEX, parser_classes=parser_classes)
-        return TypeDefItem(name, source=source)
+    # A routine with a typebound procedure call where the typedef is in the same module
+    item = get_item(
+        ProcedureItem, proj/'module/t_mod.F90', 't_mod#my_way', RegexParserClass.ProgramUnitClass
+    )
+    dependencies = item.dependencies
+    assert len(dependencies) == 2
+    assert dependencies[0].name == 't1'
+    assert dependencies[1].name == 'this%way'
+
+    item_cache = {
+        item.name: item,
+        't_mod': ModuleItem('t_mod', source=item.source)
+    }
+    items = item.create_dependency_items(item_cache=item_cache)
+    assert items == ('t_mod#t1', 't_mod#t1%way')
+
+
+def test_typedef_item(here):
+    proj = here/'sources/projBatch'
 
-    # A file with a single type definition
-    item = get_item('module/t_mod.F90', 't_mod#t', RegexParserClass.ProgramUnitClass | RegexParserClass.TypeDefClass)
+    # A file with multiple type definitions, of which we pick one
+    item = get_item(
+        TypeDefItem, proj/'module/t_mod.F90', 't_mod#t',
+        RegexParserClass.ProgramUnitClass | RegexParserClass.TypeDefClass
+    )
     assert item.name == 't_mod#t'
+    assert str(item) == 'loki.bulk.TypeDefItem<t_mod#t>'
     assert item.ir is item.source['t']
     assert item.definitions is ()
 
     assert not item.create_definition_items(item_cache={})
-    assert item.dependencies == as_tuple(item.ir.parent.imports)
+    assert item.dependencies == (item.scope.import_map['tt'], item.ir.parent['t1'])
 
+    item_cache = CaseInsensitiveDict()
+    item_cache[item.name] = item
+    with pytest.raises(RuntimeError):
+        item.create_dependency_items(item_cache=item_cache)
+
+    # Need to add the module of the dependent type
+    item_cache['tt_mod'] = get_item(
+        ModuleItem, proj/'module/tt_mod.F90', 'tt_mod', RegexParserClass.ProgramUnitClass
+    )
+    assert 'tt_mod#tt' not in item_cache
+    assert 't_mod#t1' not in item_cache
+    items = item.create_dependency_items(item_cache=item_cache)
+    assert 'tt_mod#tt' in item_cache
+    assert 't_mod#t1' in item_cache
+    assert items == (item_cache['tt_mod#tt'], item_cache['t_mod#t1'])
+    assert all(isinstance(i, TypeDefItem) for i in items)
+    assert not items[0].dependencies
 
-def test_item_graph(here):
-    """
-    Build a :any:`nx.Digraph` from a dummy call hierarchy to check the incremental parsing and
-    discovery behaves as expected.
 
-    Expected dependencies:
+def test_interface_item(here):
+    pass
 
-    .. code-block::
-             + -------------- + --(imports)-->  t_mod#t  --(imports)-->  tt_mod#tt
-           /                  |
-       comp1  --(calls)-->  comp2  --(calls)-->  a_mod#a
-         |                    |
-         |                    + --(calls)-->  b_mod#b
-         |                    |
-         |                    + --(calls)--> tt_mod#tt%proc --(binds to) --> tt_mod#tt_proc
-         |
-         + --(calls)-->  t_mod#t%proc  --(binds to)--> t_mod#t_proc
-         |
-         + --(calls)-->  t_mod#t%no%way --(binds to)-->  t_mod#t1%way  --(binds to)-->  t_mod#my_way
 
-    Additionally, ``comp`` depends on ``header_mod`` (for a kind-parameter ``k``), while
-    all others except ``t_mod``/``t_mod#t`` depend directly on the kind-parameter ``header_mod#k``.
+def test_global_variable_item(here):
+    proj = here/'sources/projBatch'
 
+    # A file with a global parameter definition
+    item = get_item(
+        GlobalVariableItem, proj/'headers/header_mod.F90', 'header_mod#k',
+        RegexParserClass.ProgramUnitClass | RegexParserClass.DeclarationClass
+    )
+    assert item.name == 'header_mod#k'
+    assert str(item) == 'loki.bulk.GlobalVariableItem<header_mod#k>'
+    assert item.ir == item.source['header_mod'].declarations[0]
+    assert item.definitions is ()
+    assert not item.create_definition_items(item_cache={})
+    assert item.dependencies is ()
+    assert not item.create_dependency_items(item_cache={})
+
+
+def test_procedure_binding_item1(here):
+    proj = here/'sources/projBatch'
+    parser_classes = (
+        RegexParserClass.ProgramUnitClass | RegexParserClass.TypeDefClass | RegexParserClass.DeclarationClass
+    )
+
+    # A typedef with a procedure binding as well as nested types that have in turn procedure bindings
+
+    # 1. A direct procedure binding
+    item = get_item(ProcedureBindingItem, proj/'module/t_mod.F90', 't_mod#t%proc', parser_classes)
+    assert item.name == 't_mod#t%proc'
+    assert str(item) == 'loki.bulk.ProcedureBindingItem<t_mod#t%proc>'
+    assert item.ir is item.source['t'].variable_map['proc']
+    assert item.definitions is ()
+    assert not item.create_definition_items(item_cache={})
+    assert item.dependencies == as_tuple(item.source['t_proc'])
+    items = item.create_dependency_items(item_cache={})
+    assert len(items) == 1
+    assert isinstance(items[0], ProcedureItem)
+    assert items[0].ir is item.source['t_proc']
+
+
+def test_procedure_binding_item2(here):
+    proj = here/'sources/projBatch'
+    parser_classes = (
+        RegexParserClass.ProgramUnitClass | RegexParserClass.TypeDefClass | RegexParserClass.DeclarationClass
+    )
+
+    # 2. An indirect procedure binding via a nested type member, where the type is declared in the same module
+    item = get_item(ProcedureBindingItem, proj/'module/t_mod.F90', 't_mod#t%no%way', parser_classes)
+    assert item.name == 't_mod#t%no%way'
+    assert isinstance(item.ir, Scalar)
+    assert item.definitions is ()
+    assert not item.create_definition_items(item_cache={})
+    assert item.dependencies == ('no%way',)
+
+    item_cache = {item.name: item}
+    with pytest.raises(RuntimeError):
+        # Fails because item_cache does not contain the relevant module
+        item.create_dependency_items(item_cache=item_cache)
+
+    item_cache['t_mod'] = ModuleItem('t_mod', source=item.source)
+    items = item.create_dependency_items(item_cache=item_cache)
+    assert len(items) == 1
+    assert isinstance(items[0], ProcedureBindingItem)
+    assert items[0].name == 't_mod#t1%way'
+    assert 't_mod#t1%way' in item_cache
+
+    assert 't_mod#my_way' not in item_cache
+    next_items = items[0].create_dependency_items(item_cache=item_cache)
+    assert len(next_items) == 1
+    assert isinstance(next_items[0], ProcedureItem)
+    assert next_items[0].ir is item.source['my_way']
+    assert 't_mod#my_way' in item_cache
+
+
+def test_item_graph(here, comp1_expected_dependencies):
+    """
+    Build a :any:`nx.Digraph` from a dummy call hierarchy to check the incremental parsing and
+    discovery behaves as expected.
     """
     proj = here/'sources/projBatch'
     suffixes = ['.f90', '.F90']
 
     path_list = [f for ext in suffixes for f in proj.glob(f'**/*{ext}')]
-    assert len(path_list) == 7
+    assert len(path_list) == 8
 
     # Map item names to items
-    item_cache = {}
+    item_cache = CaseInsensitiveDict()
 
     # Instantiate the basic list of items (files, modules, subroutines)
     for path in path_list:
         relative_path = str(path.relative_to(proj))
-        source = Sourcefile.from_file(path, frontend=REGEX, parser_classes=RegexParserClass.ProgramUnitClass)
-        file_item = FileItem(name=relative_path, source=source)
+        file_item = get_item(FileItem, path, relative_path, RegexParserClass.ProgramUnitClass)
         item_cache[relative_path] = file_item
-        item_cache.update((item.name, item) for item in file_item.create_definition_items(item_cache={}))
+        item_cache.update((item.name, item) for item in file_item.create_definition_items(item_cache=item_cache))
 
     # Populate a graph from a seed routine
     seed = '#comp1'
+    queue = deque()
     full_graph = nx.DiGraph()
     full_graph.add_node(item_cache[seed])
-
-    dependencies = item_cache[seed].create_dependency_items(item_cache=item_cache)
-    full_graph.add_nodes_from(dependencies)
-    full_graph.add_edges_from((item_cache[seed], item) for item in dependencies)
-
-    queue = deque()
-    queue.extend(dependencies)
+    queue.append(item_cache[seed])
 
     while queue:
         item = queue.popleft()
@@ -288,34 +487,18 @@ def test_item_graph(here):
             queue.extend(new_items)
         full_graph.add_edges_from((item, dependency) for dependency in dependencies)
 
-    expected_dependencies = {
-        '#comp1': ('header_mod', 't_mod#t', '#comp2', 't_mod#t%proc', 't_mod#t%no%way'),
-        '#comp2': ('header_mod#k', 't_mod#t', 'a_mod#a', 'b_mod#b', 't_mod#t%yay%proc'),
-        'a_mod#a': ('header_mod#k',),
-        'b_mod#b': ('header_mod#k',),
-        't_mod#t': ('tt_mod#tt',),
-        't_mod#t%proc': ('t_mod#t_proc',),
-        't_mod#t_proc': ('tt_mod#tt',),
-        't_mod#t%no%way': ('t_mod#t1%way',),
-        't_mod#t%yay%proc': ('tt_mod#tt%proc',),
-        't_mod#t1%way': ('t_mod#my_way',),
-        't_mod#my_way': ('tt_mod#tt',),
-        'tt_mod#tt': ('header_mod#k',),
-        'tt_mod#tt%proc': ('tt_mod#tt_proc',),
-        'tt_mod#tt_proc': ('header_mod#k',),
-        'header_mod': (),
-        'header_mod#k': (),
-    }
-
-    assert len(full_graph) == len(expected_dependencies)
-    assert all(key in full_graph for key in expected_dependencies)
+    assert set(full_graph) == set(comp1_expected_dependencies)
+    assert all(key in full_graph for key in comp1_expected_dependencies)
 
     edges = tuple((a.name, b.name) for a, b in full_graph.edges)
-    for node, dependencies in expected_dependencies.items():
+    for node, dependencies in comp1_expected_dependencies.items():
         for dependency in dependencies:
             assert (node, dependency) in edges
-    assert len(edges) == sum(len(dependencies) for dependencies in expected_dependencies.values())
+    assert len(edges) == sum(len(dependencies) for dependencies in comp1_expected_dependencies.values())
 
     # Note: quick visualization for debugging can be done using matplotlib
-    #   import matplotlib.pyplot as plt
-    #   nx.draw_planar(full_graph, with_labels=True)
+    # import matplotlib.pyplot as plt
+    # nx.draw_planar(full_graph, with_labels=True)
+    # plt.show()
+    # # -or-
+    # plt.savefig('test_item_graph.png')

From 63394bef6f3f617ea8536d201a65509bebb277de Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Fri, 19 May 2023 17:27:20 +0100
Subject: [PATCH 14/23] Add a rudimentary SGraph implementation

---
 loki/bulk/scheduler.py                   | 94 ++++++++++++++++++++++-
 tests/sources/projBatch/module/t_mod.F90 |  2 +-
 tests/test_batch.py                      | 97 +++++++++++++++++++++++-
 3 files changed, 189 insertions(+), 4 deletions(-)

diff --git a/loki/bulk/scheduler.py b/loki/bulk/scheduler.py
index 52819df62..f1e3171eb 100644
--- a/loki/bulk/scheduler.py
+++ b/loki/bulk/scheduler.py
@@ -21,7 +21,7 @@
 from loki.module import Module
 
 
-__all__ = ['Scheduler', 'SchedulerConfig']
+__all__ = ['Scheduler', 'SchedulerConfig', 'SGraph']
 
 
 class SchedulerConfig:
@@ -788,3 +788,95 @@ def write_cmake_plan(self, filepath, mode, buildpath, rootpath):
 
             s_remove = '\n'.join(f'    {s}' for s in sources_to_remove)
             f.write(f'set( LOKI_SOURCES_TO_REMOVE \n{s_remove}\n   )\n')
+
+
+class SGraph:
+
+    def __init__(self, seed, item_cache):
+        self._graph = nx.DiGraph()
+        self.populate(seed, item_cache)
+
+    def populate(self, seed, item_cache):
+        queue = deque()
+
+        # Insert the seed objects
+        for name in as_tuple(seed):
+            if '#' not in name:
+                name = f'#{name}'
+            item = item_cache.get(name)
+
+            if not item:
+                # We may have to create the corresponding module's definitions first
+                module_item = item_cache.get(name[:name.index('#')])
+                if module_item:
+                    module_item.create_definition_items(item_cache)
+                    item = item_cache.get(name)
+
+            if item:
+                self.add_node(item)
+                queue.append(item)
+            else:
+                debug('No item found for seed "%s"', name)
+
+        # Populate the graph
+        while queue:
+            item = queue.popleft()
+            dependencies = item.create_dependency_items(item_cache=item_cache)
+            new_items = [item_ for item_ in dependencies if item_ not in self._graph]
+            if new_items:
+                self.add_nodes(new_items)
+                queue.extend(new_items)
+            self.add_edges((item, item_) for item_ in dependencies)
+
+    @property
+    def items(self):
+        return tuple(self._graph.nodes)
+
+    @property
+    def dependencies(self):
+        return tuple(self._graph.edges)
+
+    def add_node(self, item):
+        self._graph.add_node(item)
+
+    def add_nodes(self, items):
+        self._graph.add_nodes_from(items)
+
+    def add_edge(self, edge):
+        self._graph.add_edge(edge[0], edge[1])
+
+    def add_edges(self, edges):
+        self._graph.add_edges_from(edges)
+
+    def export_to_file(self, dotfile_path):
+        """
+        Generate a dotfile from the current graph
+
+        Parameters
+        ----------
+        dotfile_path : str or pathlib.Path
+            Path to write the callgraph figure to.
+        """
+        try:
+            import graphviz as gviz  # pylint: disable=import-outside-toplevel
+        except ImportError:
+            warning('[Loki] Failed to load graphviz, skipping file export generation...')
+            return
+
+        path = Path(dotfile_path)
+        graph = gviz.Digraph(format='pdf', strict=True, graph_attr=(('rankdir', 'LR'),))
+
+        # Insert all nodes in the graph
+        style = {
+            'color': 'black', 'shape': 'box', 'fillcolor': 'limegreen', 'style': 'filled'
+        }
+        for item in self.items:
+            graph.node(item.name.upper(), **style)
+
+        # Insert all edges in the schedulers graph
+        graph.edges((a.name.upper(), b.name.upper()) for a, b in self.dependencies)
+
+        try:
+            graph.render(path, view=False)
+        except gviz.ExecutableNotFound as e:
+            warning(f'[Loki] Failed to render callgraph due to graphviz error:\n  {e}')
diff --git a/tests/sources/projBatch/module/t_mod.F90 b/tests/sources/projBatch/module/t_mod.F90
index abf677382..b599b82f7 100644
--- a/tests/sources/projBatch/module/t_mod.F90
+++ b/tests/sources/projBatch/module/t_mod.F90
@@ -21,7 +21,7 @@ subroutine t_proc(this)
         call this%yay%proc()
     end subroutine t_proc
 
-    subroutine my_way(this, recurse)
+    recursive subroutine my_way(this, recurse)
         class(t1), intent(inout) :: this
         logical, intent(in) :: recurse
         if (recurse) call this%way(.false.)
diff --git a/tests/test_batch.py b/tests/test_batch.py
index a7c7aa62f..dc9e965f5 100644
--- a/tests/test_batch.py
+++ b/tests/test_batch.py
@@ -8,12 +8,14 @@
 
 from collections import deque
 from pathlib import Path
+import re
 import networkx as nx
 import pytest
 
 from loki import (
-    HAVE_FP, HAVE_OFP, REGEX, RegexParserClass, as_tuple, CaseInsensitiveDict,
+    HAVE_FP, HAVE_OFP, REGEX, RegexParserClass, as_tuple, CaseInsensitiveDict, gettempdir,
     FileItem, ModuleItem, ProcedureItem, TypeDefItem, ProcedureBindingItem, GlobalVariableItem,
+    SGraph,
     Sourcefile, Section, RawSource, Import, CallStatement, Scalar
 )
 
@@ -48,6 +50,46 @@ def fixture_comp1_expected_dependencies():
     }
 
 
+@pytest.fixture(scope='module', name='mod_proc_expected_dependencies')
+def fixture_mod_proc_expected_dependencies():
+    return {
+        'other_mod#mod_proc': ('tt_mod#tt', 'tt_mod#tt%proc', 'b_mod#b'),
+        'tt_mod#tt': (),
+        'tt_mod#tt%proc': ('tt_mod#tt_proc',),
+        'tt_mod#tt_proc': ('tt_mod#tt',),
+        'b_mod#b': ()
+    }
+
+
+@pytest.fixture(scope='module', name='expected_dependencies')
+def fixture_expected_dependencies(comp1_expected_dependencies, mod_proc_expected_dependencies):
+    return comp1_expected_dependencies | mod_proc_expected_dependencies
+
+
+@pytest.fixture(scope='module', name='no_expected_dependencies')
+def fixture_no_expected_dependencies():
+    return {}
+
+
+class VisGraphWrapper:
+    """
+    Testing utility to parse the generated callgraph visualisation.
+    """
+
+    _re_nodes = re.compile(r'\s*\"?(?P<node>[\w%#./]+)\"? \[colo', re.IGNORECASE)
+    _re_edges = re.compile(r'\s*\"?(?P<parent>[\w%#./]+)\"? -> \"?(?P<child>[\w%#./]+)\"?', re.IGNORECASE)
+
+    def __init__(self, path):
+        self.text = Path(path).read_text()
+
+    @property
+    def nodes(self):
+        return list(self._re_nodes.findall(self.text))
+
+    @property
+    def edges(self):
+        return list(self._re_edges.findall(self.text))
+
 
 def get_item(cls, path, name, parser_classes):
     source = Sourcefile.from_file(path, frontend=REGEX, parser_classes=parser_classes)
@@ -488,7 +530,6 @@ def test_item_graph(here, comp1_expected_dependencies):
         full_graph.add_edges_from((item, dependency) for dependency in dependencies)
 
     assert set(full_graph) == set(comp1_expected_dependencies)
-    assert all(key in full_graph for key in comp1_expected_dependencies)
 
     edges = tuple((a.name, b.name) for a, b in full_graph.edges)
     for node, dependencies in comp1_expected_dependencies.items():
@@ -502,3 +543,55 @@ def test_item_graph(here, comp1_expected_dependencies):
     # plt.show()
     # # -or-
     # plt.savefig('test_item_graph.png')
+
+
+@pytest.mark.parametrize('seed,dependencies_fixture', [
+    ('#comp1', 'comp1_expected_dependencies'),
+    ('other_mod#mod_proc', 'mod_proc_expected_dependencies'),
+    (['#comp1', 'other_mod#mod_proc'], 'expected_dependencies'),
+    ('foobar', 'no_expected_dependencies')
+])
+def test_sgraph_from_seed(here, seed, dependencies_fixture, request):
+    expected_dependencies = request.getfixturevalue(dependencies_fixture)
+    proj = here/'sources/projBatch'
+    suffixes = ['.f90', '.F90']
+
+    path_list = [f for ext in suffixes for f in proj.glob(f'**/*{ext}')]
+    assert len(path_list) == 8
+
+    # Map item names to items
+    item_cache = CaseInsensitiveDict()
+
+    # Instantiate the basic list of items (files, modules, subroutines)
+    for path in path_list:
+        relative_path = str(path.relative_to(proj))
+        file_item = get_item(FileItem, path, relative_path, RegexParserClass.ProgramUnitClass)
+        item_cache[relative_path] = file_item
+        item_cache.update((item.name, item) for item in file_item.create_definition_items(item_cache=item_cache))
+
+    # Create the graph
+    sgraph = SGraph(seed, item_cache)
+
+    # Check the graph
+    assert set(sgraph.items) == set(expected_dependencies)
+    assert set(sgraph.dependencies) == {
+        (node, dependency)
+        for node, dependencies in expected_dependencies.items()
+        for dependency in dependencies
+    }
+
+    # Check the graph visualization
+    graph_file = gettempdir()/'sgraph_from_seed.dot'
+    sgraph.export_to_file(graph_file)
+    assert graph_file.exists()
+    assert graph_file.with_suffix('.dot.pdf').exists()
+
+    vgraph = VisGraphWrapper(graph_file)
+    assert set(vgraph.nodes) == {item.upper() for item in expected_dependencies}
+    assert set(vgraph.edges) == {
+        (node.upper(), dependency.upper())
+        for node, dependencies in expected_dependencies.items()
+        for dependency in dependencies
+    }
+    graph_file.unlink()
+    graph_file.with_suffix('.dot.pdf').unlink()

From 25078111130f3b7a5eb076376db6187cedb29b2a Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Mon, 22 May 2023 11:24:20 +0100
Subject: [PATCH 15/23] Refactored item creation

---
 loki/bulk/item.py                         | 238 ++++++++++------------
 loki/bulk/scheduler.py                    |  13 +-
 tests/sources/projBatch/module/tt_mod.F90 |   6 +-
 tests/test_batch.py                       |  60 ++++--
 4 files changed, 166 insertions(+), 151 deletions(-)

diff --git a/loki/bulk/item.py b/loki/bulk/item.py
index 0d4096704..1ca4c29c9 100644
--- a/loki/bulk/item.py
+++ b/loki/bulk/item.py
@@ -10,10 +10,11 @@
 import sys
 
 from loki.frontend import REGEX, RegexParserClass
-from loki.expression import TypedSymbol, MetaSymbol, ProcedureSymbol, FindVariables
+from loki.expression import TypedSymbol, MetaSymbol, ProcedureSymbol
 from loki.ir import Import, CallStatement, TypeDef, ProcedureDeclaration
 from loki.logging import warning
 from loki.module import Module
+from loki.scope import Scope
 from loki.subroutine import Subroutine
 from loki.tools import as_tuple, flatten, CaseInsensitiveDict
 from loki.types import DerivedType
@@ -154,161 +155,142 @@ def concretize_dependencies(self):
                 ir = ir.parent
             ir.make_complete(frontend=REGEX, parser_classes=self._depends_class)
 
+    def _get_procedure_item(self, proc_symbol, item_cache):
+        # A recursive map of all imports
+        import_map = CaseInsensitiveDict()
+        scope = self.ir
+        if not isinstance(scope, Scope):
+            scope = scope.scope
+        current_module = None
+        while scope:
+            if hasattr(scope, 'import_map'):
+                import_map |= scope.import_map
+            if isinstance(scope, Module):
+                current_module = scope
+            scope = scope.parent
+
+        proc_name = proc_symbol.name
+        if '%' in proc_name:
+            # This is a typebound procedure call: we are only resolving
+            # to the type member by mapping the local name to the type name,
+            # and creating a ProcedureBindingItem. For that we need to find out
+            # the type of the derived type symbol.
+            # NB: For nested derived types, we create multiple such ProcedureBindingItems,
+            #     resolving one type at a time, e.g.
+            #     my_var%member%procedure -> my_type%member%procedure -> member_type%procedure -> procedure
+            type_name = proc_symbol.parents[0].type.dtype.name
+            # Imported in current or parent scopes?
+            imprt = import_map.get(type_name)
+            if imprt:
+                scope_name = imprt.module
+            # Otherwise: must be declared in parent module scope
+            elif current_module and type_name in current_module.typedef_map:
+                scope_name = current_module.name
+            # 4. Unknown: Likely imported via `USE` without `ONLY` list
+            else:
+                # NB: We could now search the item_cache for entries ending in `#{type_name}`,
+                #     hoping the corresponding TypeDefItem has already been created, which it
+                #     will probably not have been. Therefore, we require the underlying Fortran to
+                #     have fully-qualified imports instead
+                raise RuntimeError(
+                    f'Unable to find the module declaring {type_name}. Import via `USE` without `ONLY`?'
+                )
+            item_name = f'{scope_name}#{type_name}%{"%".join(proc_symbol.name_parts[1:])}'.lower()
+            return self._get_or_create_item(ProcedureBindingItem, item_name, item_cache, scope_name)
+
+        if proc_name in import_map:
+            # This is a call to a module procedure which has been imported via
+            # a fully qualified import in the current or parent scope
+            scope_name = import_map.get(proc_name).module
+            item_name = f'{scope_name}#{proc_name}'.lower()
+            return self._get_or_create_item(ProcedureItem, item_name, item_cache, scope_name)
+
+        if proc_name in (intf_map := self.ir.interface_symbols):
+            # TODO: Handle declaration via interface
+            raise NotImplementedError()
+
+        # This is a call to a subroutine declared via header-included interface
+        item_name = f'#{proc_name}'.lower()
+        return item_cache[item_name]
+
+
     @staticmethod
-    def _get_or_create_item(item_cls, item_name, item_cache, source=None, module_name=None):
+    def _get_or_create_item(item_cls, item_name, item_cache, scope_name):
         if item_name in item_cache:
             return item_cache[item_name]
-
-        if not source:
-            if not module_name:
-                raise ValueError('Need to provide source or module_name')
-            if module_name not in item_cache:
-                raise RuntimeError(f'Module {module_name} not found in item_cache')
-            source = item_cache[module_name].source
-
+        if scope_name not in item_cache:
+            raise RuntimeError(f'Module {scope_name} not found in item_cache')
+        source = item_cache[scope_name].source
         item = item_cls(item_name, source=source)
         item_cache[item_name] = item
         return item
 
-    def create_from_ir(self, node, item_cache):
+    def _create_from_ir(self, node, item_cache, config):
         if isinstance(node, Module):
+            # We may create ModuleItem in two situations:
+            # 1. as a dependency, when it is likely already present in the item_cache, or
+            # 2. from a FileItem, e.g. to instantiate it in the item_cache for the first time
+            # Therefore, we pass here both, `source` and `module_name`
+            # For the latter case, we pass current Item's name to perform the lookup via the file item
+            # entry in the item_cache
             item_name = node.name.lower()
-            items = as_tuple(self._get_or_create_item(ModuleItem, item_name, item_cache, source=self.source))
-
-        elif isinstance(node, Subroutine):
-            item_name = f'{getattr(node.parent, "name", "")}#{node.name}'.lower()
-            items = as_tuple(self._get_or_create_item(ProcedureItem, item_name, item_cache, source=self.source))
-
-        elif isinstance(node, TypeDef):
-            item_name = f'{node.parent.name}#{node.name}'.lower()
-            items = as_tuple(self._get_or_create_item(TypeDefItem, item_name, item_cache, source=self.source))
-
-        elif isinstance(node, Import):
+            scope_name = item_name if item_name in item_cache else self.name
+            return as_tuple(self._get_or_create_item(ModuleItem, item_name, item_cache, scope_name))
+
+        if isinstance(node, Subroutine):
+            # Like ModuleItem, this may be a dependency or a first-time instantiation
+            scope_name = getattr(node.parent, 'name', '').lower()
+            item_name = f'{scope_name}#{node.name}'.lower()
+            return as_tuple(self._get_or_create_item(ProcedureItem, item_name, item_cache, scope_name or self.name))
+
+        if isinstance(node, TypeDef):
+            # A typedef always lives in a Module
+            scope_name = node.parent.name.lower()
+            item_name = f'{scope_name}#{node.name}'.lower()
+            return as_tuple(self._get_or_create_item(TypeDefItem, item_name, item_cache, scope_name))
+
+        if isinstance(node, Import):
             # If we have a fully-qualified import (which we hopefully have),
             # we create a dependency for every imported symbol, otherwise we
             # depend only on the imported module
-            module_name = node.module.lower()
-            if module_name not in item_cache:
-                raise RuntimeError(f'Module {module_name} not found in item_cache')
-            module_item = item_cache[module_name]
+            scope_name = node.module.lower()
+            if scope_name not in item_cache:
+                raise RuntimeError(f'Module {scope_name} not found in item_cache')
+            scope_item = item_cache[scope_name]
             if node.symbols:
-                module_definitions = {
-                    item.local_name: item for item in module_item.create_definition_items(item_cache=item_cache)
+                scope_definitions = {
+                    item.local_name: item for item in scope_item.create_definition_items(item_cache=item_cache)
                 }
-                items = tuple(module_definitions[str(smbl).lower()] for smbl in node.symbols)
-            else:
-                items = as_tuple(module_item)
-
-        elif isinstance(node, CallStatement):
-            procedure_name = str(node.name)
-            if '%' in procedure_name:
-                # This is a typebound procedure call, we are only resolving
-                # to the type member by mapping the local name to the type name
-                type_name = node.name.parents[0].type.dtype.name.lower()
-                # Find the module where the type is defined:
-                scope = node.name.scope
-                # 1. Import in current scope
-                imprt = scope.import_map.get(type_name)
-                # 2. Import in parent scope
-                if not imprt and scope.parent:
-                    imprt = scope.parent.import_map.get(type_name)
-                if imprt:
-                    module_name = imprt.module
-                # 3. Declared in parent scope
-                elif scope.parent and type_name in scope.parent.typedef_map:
-                    module_name = scope.parent.name
-                # 4. Unknown
-                else:
-                    raise RuntimeError(f'Unable to find the module declaring {type_name}')
-
-                item_name = f'{module_name}#{type_name}%{"%".join(node.name.name_parts[1:])}'.lower()
-                items = as_tuple(self._get_or_create_item(
-                    ProcedureBindingItem, item_name, item_cache, module_name=module_name
-                ))
-            elif procedure_name in self.ir.imported_symbols:
-                # This is a call to a module procedure which has been imported via
-                # a fully qualified import
-                module_name = self.ir.import_map.get(procedure_name).module
-                item_name = f'{module_name}#{procedure_name}'.lower()
-                items = as_tuple(self._get_or_create_item(
-                    ProcedureItem, item_name, item_cache, module_name=module_name
-                ))
-            elif self.ir.parent and procedure_name in self.ir.parent.imported_symbols:
-                # This is a call to a module procedure which has been imported via
-                # a fully qualified import in the parent scope
-                module_name = self.ir.parent.import_map.get(procedure_name).module
-                item_name = f'{module_name}#{procedure_name}'.lower()
-                items = as_tuple(self._get_or_create_item(
-                    ProcedureItem, item_name, item_cache, module_name=module_name
-                ))
-
-            elif procedure_name in (intf_map := self.ir.interface_symbols):
-                # TODO: Handle declaration via interface
-                raise NotImplementedError()
-            else:
-                # This is a call to a subroutine declared via header-included interface
-                item_name = f'#{procedure_name}'.lower()
-                items = as_tuple(item_cache[item_name])
-
-        elif isinstance(node, ProcedureSymbol):
-            # This is a procedure binding, presumably to a routine that is
-            # bound to a derived type that is nested into another derived type
-            assert '%' in node.name
-            type_name = node.parents[0].type.dtype.name.lower()
-            proc_name = '%'.join(node.name_parts[1:])
-
-            # Find the module where the type is defined:
-            scope = node.scope
-            # 1. Import in current scope
-            if hasattr(scope, 'import_map'):
-                imprt = scope.import_map.get(type_name)
-            else:
-                imprt = None
-            # 2. Import in parent scope
-            if not imprt and scope.parent:
-                imprt = scope.parent.import_map.get(type_name)
-            if imprt:
-                module_name = imprt.module
-            # 3. Declared in parent scope
-            elif scope.parent and type_name in scope.parent.typedef_map:
-                module_name = scope.parent.name
-            # 4. Unknown
-            else:
-                raise RuntimeError(f'Unable to find the module declaring {type_name}')
-
-            item_name = f'{module_name}#{type_name}%{proc_name}'.lower()
-            items = as_tuple(self._get_or_create_item(
-                ProcedureBindingItem, item_name, item_cache, module_name=module_name
-            ))
+                return tuple(scope_definitions[str(smbl).lower()] for smbl in node.symbols)
+            return (scope_item,)
 
-        elif isinstance(node, (TypedSymbol, MetaSymbol)):
-            # This is a global variable
-            item_name = f'{node.scope.name}#{node.name}'.lower()
-            items = as_tuple(self._get_or_create_item(
-                GlobalVariableItem, item_name, item_cache, module_name=node.scope.name
-            ))
+        if isinstance(node, CallStatement):
+            return as_tuple(self._get_procedure_item(node.name, item_cache))
 
-        else:
-            raise ValueError(f'{node} has an unsupported node type {type(node)}')
+        if isinstance(node, ProcedureSymbol):
+            return as_tuple(self._get_procedure_item(node, item_cache))
 
-        # Insert new items into the cache
-        item_cache.update((item.name, item) for item in items if item.name not in item_cache)
+        if isinstance(node, (TypedSymbol, MetaSymbol)):
+            # This is a global variable
+            scope_name = node.scope.name.lower()
+            item_name = f'{scope_name}#{node.name}'.lower()
+            return as_tuple(self._get_or_create_item(GlobalVariableItem, item_name, item_cache, scope_name))
 
-        return items
+        raise ValueError(f'{node} has an unsupported node type {type(node)}')
 
-    def create_definition_items(self, item_cache, only=None):
-        items = tuple(flatten(self.create_from_ir(node, item_cache) for node in self.definitions))
+    def create_definition_items(self, item_cache, config=None, only=None):
+        items = tuple(flatten(self._create_from_ir(node, item_cache, config) for node in self.definitions))
         if only:
             items = tuple(item for item in items if isinstance(item, only))
         return items
 
-    def create_dependency_items(self, item_cache, only=None):
+    def create_dependency_items(self, item_cache, config=None, only=None):
         if not (dependencies := self.dependencies):
             return ()
 
         items = ()
         for node in dependencies:
-            items += self.create_from_ir(node, item_cache)
+            items += self._create_from_ir(node, item_cache, config)
 
         if only:
             items = tuple(item for item in items if isinstance(item, only))
diff --git a/loki/bulk/scheduler.py b/loki/bulk/scheduler.py
index f1e3171eb..72d9b7370 100644
--- a/loki/bulk/scheduler.py
+++ b/loki/bulk/scheduler.py
@@ -792,11 +792,11 @@ def write_cmake_plan(self, filepath, mode, buildpath, rootpath):
 
 class SGraph:
 
-    def __init__(self, seed, item_cache):
+    def __init__(self, seed, item_cache, config=None):
         self._graph = nx.DiGraph()
-        self.populate(seed, item_cache)
+        self.populate(seed, item_cache, config)
 
-    def populate(self, seed, item_cache):
+    def populate(self, seed, item_cache, config):
         queue = deque()
 
         # Insert the seed objects
@@ -809,7 +809,7 @@ def populate(self, seed, item_cache):
                 # We may have to create the corresponding module's definitions first
                 module_item = item_cache.get(name[:name.index('#')])
                 if module_item:
-                    module_item.create_definition_items(item_cache)
+                    module_item.create_definition_items(item_cache=item_cache, config=config)
                     item = item_cache.get(name)
 
             if item:
@@ -821,7 +821,7 @@ def populate(self, seed, item_cache):
         # Populate the graph
         while queue:
             item = queue.popleft()
-            dependencies = item.create_dependency_items(item_cache=item_cache)
+            dependencies = item.create_dependency_items(item_cache=item_cache, config=config)
             new_items = [item_ for item_ in dependencies if item_ not in self._graph]
             if new_items:
                 self.add_nodes(new_items)
@@ -855,7 +855,8 @@ def export_to_file(self, dotfile_path):
         Parameters
         ----------
         dotfile_path : str or pathlib.Path
-            Path to write the callgraph figure to.
+            Path to write the dotfile to. A corresponding graphical representation
+            will be created with an additional ``.pdf`` appendix.
         """
         try:
             import graphviz as gviz  # pylint: disable=import-outside-toplevel
diff --git a/tests/sources/projBatch/module/tt_mod.F90 b/tests/sources/projBatch/module/tt_mod.F90
index c043a50e6..9ff23aedb 100644
--- a/tests/sources/projBatch/module/tt_mod.F90
+++ b/tests/sources/projBatch/module/tt_mod.F90
@@ -8,10 +8,10 @@ module tt_mod
         real(kind=k), allocatable :: indirection(:)
         real(kind=k) :: other(nclv)
     contains
-        procedure :: proc => tt_proc
+        procedure :: proc
     end type tt
 contains
-    subroutine tt_proc(this)
+    subroutine proc(this)
         class(tt), intent(inout) :: this
-    end subroutine tt_proc
+    end subroutine proc
 end module tt_mod
diff --git a/tests/test_batch.py b/tests/test_batch.py
index dc9e965f5..862564ede 100644
--- a/tests/test_batch.py
+++ b/tests/test_batch.py
@@ -43,8 +43,8 @@ def fixture_comp1_expected_dependencies():
         't_mod#t1%way': ('t_mod#my_way',),
         't_mod#my_way': ('t_mod#t1', 't_mod#t1%way'),
         'tt_mod#tt': (),
-        'tt_mod#tt%proc': ('tt_mod#tt_proc',),
-        'tt_mod#tt_proc': ('tt_mod#tt',),
+        'tt_mod#tt%proc': ('tt_mod#proc',),
+        'tt_mod#proc': ('tt_mod#tt',),
         'header_mod': (),
         'header_mod#k': (),
     }
@@ -55,8 +55,8 @@ def fixture_mod_proc_expected_dependencies():
     return {
         'other_mod#mod_proc': ('tt_mod#tt', 'tt_mod#tt%proc', 'b_mod#b'),
         'tt_mod#tt': (),
-        'tt_mod#tt%proc': ('tt_mod#tt_proc',),
-        'tt_mod#tt_proc': ('tt_mod#tt',),
+        'tt_mod#tt%proc': ('tt_mod#proc',),
+        'tt_mod#proc': ('tt_mod#tt',),
         'b_mod#b': ()
     }
 
@@ -122,17 +122,23 @@ def test_file_item1(here):
     # Querying definitions triggers a round of parsing
     assert item.definitions == (item.source['a_mod'],)
     assert len(item.source.definitions) == 1
-    items = item.create_definition_items(item_cache={})
+
+    with pytest.raises(RuntimeError):
+        # Without the FileItem in the item_cache, we can't create the modules
+        item.create_definition_items(item_cache={})
+
+    items = item.create_definition_items(item_cache={item.name: item})
     assert len(items) == 1
     assert items[0] != None  # pylint: disable=singleton-comparison  # (intentionally trigger __eq__ here)
     assert items[0].name == 'a_mod'
     assert items[0].definitions == (item.source['a'],)
 
+    # The default behavior would be to have the ProgramUnits parsed already
     item = get_item(FileItem, proj/'module/a_mod.F90', 'module/a_mod.F90', RegexParserClass.ProgramUnitClass)
     assert item.name == 'module/a_mod.F90'
     assert item.definitions == (item.source['a_mod'],)
     assert item.ir is item.source
-    items = item.create_definition_items(item_cache={})
+    items = item.create_definition_items(item_cache={item.name: item})
     assert len(items) == 1
     assert items[0].name == 'a_mod'
     assert items[0].definitions == (item.source['a'],)
@@ -146,7 +152,7 @@ def test_file_item2(here):
     assert item.name == 'module/t_mod.F90'
     assert item.definitions == (item.source['t_mod'],)
 
-    items = item.create_definition_items(item_cache={})
+    items = item.create_definition_items(item_cache={item.name: item})
     assert len(items) == 1
     assert items[0].name == 't_mod'
     assert items[0].ir is item.source['t_mod']
@@ -175,7 +181,7 @@ def test_file_item3(here):
     assert item.name == 'module/t_mod.F90'
     assert item.definitions == (item.source['t_mod'],)
 
-    items = item.create_definition_items(item_cache={})
+    items = item.create_definition_items(item_cache={item.name: item})
     assert len(items) == 1
     assert items[0].name == 't_mod'
     assert len(items[0].ir.typedefs) == 2
@@ -187,8 +193,8 @@ def test_file_item3(here):
     )
 
     # Filter items when calling create_definition_items()
-    assert not item.create_definition_items(only=ProcedureItem, item_cache={})
-    items = item.create_definition_items(only=ModuleItem, item_cache={})
+    assert not item.create_definition_items(only=ProcedureItem, item_cache={item.name: item})
+    items = item.create_definition_items(only=ModuleItem, item_cache={item.name: item})
     assert len(items) == 1
     assert isinstance(items[0], ModuleItem)
     assert items[0].ir == item.source['t_mod']
@@ -206,7 +212,7 @@ def test_module_item1(here):
     assert item.ir is item.source['a_mod']
     assert item.definitions == (item.source['a'],)
 
-    items = item.create_definition_items(item_cache={})
+    items = item.create_definition_items(item_cache={item.name: item})
     assert len(items) == 1
     assert isinstance(items[0], ProcedureItem)
     assert items[0].ir == item.source['a']
@@ -224,7 +230,7 @@ def test_module_item2(here):
     assert item.ir is item.source['b_mod']
     assert item.definitions == (item.source['b'],)
 
-    items = item.create_definition_items(item_cache={})
+    items = item.create_definition_items(item_cache={item.name: item})
     assert len(items) == 1
     assert isinstance(items[0], ProcedureItem)
     assert items[0].ir == item.source['b']
@@ -380,6 +386,29 @@ def test_procedure_item4(here):
     assert items == ('t_mod#t1', 't_mod#t1%way')
 
 
+@pytest.mark.skip()
+@pytest.mark.parametrize('config,expected_dependencies', [
+    ({}, ('t_mod#t', 'header_mod#k', 'a_mod#a', 'b_mod#b', 't_mod#t%yay%proc')),
+    ({'default': {'disable': ['a']}}, ('t_mod#t', 'header_mod#k', 'b_mod#b', 't_mod#t%yay%proc')),
+])
+def test_procedure_item_with_config(here, config, expected_dependencies):
+    proj = here/'sources/projBatch'
+
+    # A file with a single subroutine definition that calls two routines via module imports
+    item = get_item(ProcedureItem, proj/'source/comp2.F90', '#comp2', RegexParserClass.ProgramUnitClass)
+
+    # We need to have suitable dependency modules in the cache to spawn the dependency items
+    item_cache = {item.name: item}
+    item_cache = {
+        (i := get_item(ModuleItem, proj/path, name, RegexParserClass.ProgramUnitClass)).name: i
+        for path, name in [
+            ('module/t_mod.F90', 't_mod'), ('module/a_mod.F90', 'a_mod'),
+            ('module/b_mod.F90', 'b_mod'), ('headers/header_mod.F90', 'header_mod')
+        ]
+    }
+    assert item.create_dependency_items(item_cache=item_cache, config=config) == expected_dependencies
+
+
 def test_typedef_item(here):
     proj = here/'sources/projBatch'
 
@@ -401,7 +430,8 @@ def test_typedef_item(here):
     with pytest.raises(RuntimeError):
         item.create_dependency_items(item_cache=item_cache)
 
-    # Need to add the module of the dependent type
+    # Need to add the modules of the dependent types
+    item_cache['t_mod'] = ModuleItem('t_mod', source=item.source)
     item_cache['tt_mod'] = get_item(
         ModuleItem, proj/'module/tt_mod.F90', 'tt_mod', RegexParserClass.ProgramUnitClass
     )
@@ -452,7 +482,9 @@ def test_procedure_binding_item1(here):
     assert item.definitions is ()
     assert not item.create_definition_items(item_cache={})
     assert item.dependencies == as_tuple(item.source['t_proc'])
-    items = item.create_dependency_items(item_cache={})
+
+    item_cache = {'t_mod': ModuleItem('t_mod', source=item.source)}
+    items = item.create_dependency_items(item_cache=item_cache)
     assert len(items) == 1
     assert isinstance(items[0], ProcedureItem)
     assert items[0].ir is item.source['t_proc']

From 19305f2a6ed2e9d7ca163b614a963880b0dd0c1d Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Mon, 22 May 2023 14:18:32 +0100
Subject: [PATCH 16/23] Enhance the SchedulerConfig with accessors

---
 loki/bulk/scheduler.py | 41 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/loki/bulk/scheduler.py b/loki/bulk/scheduler.py
index 72d9b7370..ef504d0b8 100644
--- a/loki/bulk/scheduler.py
+++ b/loki/bulk/scheduler.py
@@ -70,7 +70,10 @@ def __init__(self, default, routines, disable=None, dimensions=None, dic2p=None,
 
     @classmethod
     def from_dict(cls, config):
-        default = config['default']
+        """
+        Populate :any:`SchedulerConfig` from the given :any:`dict` :data:`config`
+        """
+        default = config.get('default', {})
         if 'routine' in config:
             config['routines'] = OrderedDict((r['name'], r) for r in config.get('routine', []))
         else:
@@ -98,6 +101,9 @@ def from_dict(cls, config):
 
     @classmethod
     def from_file(cls, path):
+        """
+        Populate :any:`SchedulerConfig` from a toml file at :data:`path`
+        """
         import toml  # pylint: disable=import-outside-toplevel
         # Load configuration file and process options
         with Path(path).open('r') as f:
@@ -105,6 +111,39 @@ def from_file(cls, path):
 
         return cls.from_dict(config)
 
+    def _get_item_keys(self, item_name, property_name):
+        """
+        Helper routine to match a :any:`Item` name, which includes a scope,
+        to entries in a config property, where names are allowed to appear
+        without the relevant scope names
+        """
+        item_name = item_name.lower()
+        item_names = (item_name, item_name[item_name.find('#')+1:])
+        return tuple(key for key in getattr(self, property_name) or () if key in item_names)
+
+    def create_item_config(self, name):
+        """
+        Create the bespoke config `dict` for an :any:`Item`
+
+        The resulting config object contains the :attr:`default`
+        values and any item-specific overwrites and additions.
+        """
+        keys = self._get_item_keys(name, 'routines')
+        if len(keys) > 1:
+            if self.default.get('strict'):
+                raise RuntimeError(f'{name} matches multiple config entries: {", ".join(keys)}')
+            warning(f'{name} matches multiple config entries: {", ".join(keys)}')
+        item_conf = self.default.copy()
+        for key in keys:
+            item_conf.update(self.routines[key])
+        return item_conf
+
+    def is_disabled(self, name):
+        """
+        Check if the item with the given :data:`name` is marked as `disabled`
+        """
+        return len(self._get_item_keys(name, 'disable')) > 0
+
 
 class Scheduler:
     """

From 1f6112a5db4be91c146405d57b545a3fe0cb134d Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Mon, 22 May 2023 14:19:33 +0100
Subject: [PATCH 17/23] Enhance Item creation with config and support for
 `disable`

---
 loki/bulk/item.py   | 50 ++++++++++++++---------
 loki/module.py      |  3 +-
 tests/test_batch.py | 97 +++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 121 insertions(+), 29 deletions(-)

diff --git a/loki/bulk/item.py b/loki/bulk/item.py
index 1ca4c29c9..6bcf6bbe8 100644
--- a/loki/bulk/item.py
+++ b/loki/bulk/item.py
@@ -155,7 +155,7 @@ def concretize_dependencies(self):
                 ir = ir.parent
             ir.make_complete(frontend=REGEX, parser_classes=self._depends_class)
 
-    def _get_procedure_item(self, proc_symbol, item_cache):
+    def _get_procedure_item(self, proc_symbol, item_cache, config):
         # A recursive map of all imports
         import_map = CaseInsensitiveDict()
         scope = self.ir
@@ -186,7 +186,7 @@ def _get_procedure_item(self, proc_symbol, item_cache):
             # Otherwise: must be declared in parent module scope
             elif current_module and type_name in current_module.typedef_map:
                 scope_name = current_module.name
-            # 4. Unknown: Likely imported via `USE` without `ONLY` list
+            # Unknown: Likely imported via `USE` without `ONLY` list
             else:
                 # NB: We could now search the item_cache for entries ending in `#{type_name}`,
                 #     hoping the corresponding TypeDefItem has already been created, which it
@@ -196,14 +196,14 @@ def _get_procedure_item(self, proc_symbol, item_cache):
                     f'Unable to find the module declaring {type_name}. Import via `USE` without `ONLY`?'
                 )
             item_name = f'{scope_name}#{type_name}%{"%".join(proc_symbol.name_parts[1:])}'.lower()
-            return self._get_or_create_item(ProcedureBindingItem, item_name, item_cache, scope_name)
+            return self._get_or_create_item(ProcedureBindingItem, item_name, item_cache, scope_name, config)
 
         if proc_name in import_map:
             # This is a call to a module procedure which has been imported via
             # a fully qualified import in the current or parent scope
             scope_name = import_map.get(proc_name).module
             item_name = f'{scope_name}#{proc_name}'.lower()
-            return self._get_or_create_item(ProcedureItem, item_name, item_cache, scope_name)
+            return self._get_or_create_item(ProcedureItem, item_name, item_cache, scope_name, config)
 
         if proc_name in (intf_map := self.ir.interface_symbols):
             # TODO: Handle declaration via interface
@@ -214,14 +214,20 @@ def _get_procedure_item(self, proc_symbol, item_cache):
         return item_cache[item_name]
 
 
-    @staticmethod
-    def _get_or_create_item(item_cls, item_name, item_cache, scope_name):
+    @classmethod
+    def _get_or_create_item(cls, item_cls, item_name, item_cache, scope_name, config):
+        if config and config.is_disabled(item_name):
+            return None
         if item_name in item_cache:
             return item_cache[item_name]
         if scope_name not in item_cache:
-            raise RuntimeError(f'Module {scope_name} not found in item_cache')
+            if config and config.default['strict']:
+                raise RuntimeError(f'Module {scope_name} not found in item_cache')
+            warning(f'Module {scope_name} not found in item_cache')
+            return None
         source = item_cache[scope_name].source
-        item = item_cls(item_name, source=source)
+        item_conf = config.create_item_config(item_name) if config else None
+        item = item_cls(item_name, source=source, config=item_conf)
         item_cache[item_name] = item
         return item
 
@@ -235,19 +241,21 @@ def _create_from_ir(self, node, item_cache, config):
             # entry in the item_cache
             item_name = node.name.lower()
             scope_name = item_name if item_name in item_cache else self.name
-            return as_tuple(self._get_or_create_item(ModuleItem, item_name, item_cache, scope_name))
+            return as_tuple(self._get_or_create_item(ModuleItem, item_name, item_cache, scope_name, config))
 
         if isinstance(node, Subroutine):
             # Like ModuleItem, this may be a dependency or a first-time instantiation
             scope_name = getattr(node.parent, 'name', '').lower()
             item_name = f'{scope_name}#{node.name}'.lower()
-            return as_tuple(self._get_or_create_item(ProcedureItem, item_name, item_cache, scope_name or self.name))
+            return as_tuple(
+                self._get_or_create_item(ProcedureItem, item_name, item_cache, scope_name or self.name, config)
+            )
 
         if isinstance(node, TypeDef):
             # A typedef always lives in a Module
             scope_name = node.parent.name.lower()
             item_name = f'{scope_name}#{node.name}'.lower()
-            return as_tuple(self._get_or_create_item(TypeDefItem, item_name, item_cache, scope_name))
+            return as_tuple(self._get_or_create_item(TypeDefItem, item_name, item_cache, scope_name, config))
 
         if isinstance(node, Import):
             # If we have a fully-qualified import (which we hopefully have),
@@ -259,27 +267,28 @@ def _create_from_ir(self, node, item_cache, config):
             scope_item = item_cache[scope_name]
             if node.symbols:
                 scope_definitions = {
-                    item.local_name: item for item in scope_item.create_definition_items(item_cache=item_cache)
+                    item.local_name: item
+                    for item in scope_item.create_definition_items(item_cache=item_cache, config=config)
                 }
-                return tuple(scope_definitions[str(smbl).lower()] for smbl in node.symbols)
+                return tuple(it for smbl in node.symbols if (it := scope_definitions.get(str(smbl).lower())))
             return (scope_item,)
 
         if isinstance(node, CallStatement):
-            return as_tuple(self._get_procedure_item(node.name, item_cache))
+            return as_tuple(self._get_procedure_item(node.name, item_cache, config))
 
         if isinstance(node, ProcedureSymbol):
-            return as_tuple(self._get_procedure_item(node, item_cache))
+            return as_tuple(self._get_procedure_item(node, item_cache, config))
 
         if isinstance(node, (TypedSymbol, MetaSymbol)):
             # This is a global variable
             scope_name = node.scope.name.lower()
             item_name = f'{scope_name}#{node.name}'.lower()
-            return as_tuple(self._get_or_create_item(GlobalVariableItem, item_name, item_cache, scope_name))
+            return as_tuple(self._get_or_create_item(GlobalVariableItem, item_name, item_cache, scope_name, config))
 
         raise ValueError(f'{node} has an unsupported node type {type(node)}')
 
     def create_definition_items(self, item_cache, config=None, only=None):
-        items = tuple(flatten(self._create_from_ir(node, item_cache, config) for node in self.definitions))
+        items = as_tuple(flatten(self._create_from_ir(node, item_cache, config) for node in self.definitions))
         if only:
             items = tuple(item for item in items if isinstance(item, only))
         return items
@@ -290,7 +299,7 @@ def create_dependency_items(self, item_cache, config=None, only=None):
 
         items = ()
         for node in dependencies:
-            items += self._create_from_ir(node, item_cache, config)
+            items += as_tuple(self._create_from_ir(node, item_cache, config))
 
         if only:
             items = tuple(item for item in items if isinstance(item, only))
@@ -879,13 +888,16 @@ class ProcedureBindingItem(Item):
     procedures to their implementation in a Fortran routine.
     """
 
-    _parser_class = RegexParserClass.CallClass
+    _parser_class = RegexParserClass.TypeDefClass | RegexParserClass.CallClass
     _depends_class = RegexParserClass.DeclarationClass
 
     @property
     def ir(self):
         name_parts = self.local_name.split('%')
         typedef = self.source[name_parts[0]]
+        if not typedef:
+            self.scope.make_complete(frontend=REGEX, parser_classes=self._parser_class)
+            typedef = self.source[name_parts[0]]
         for decl in typedef.declarations:
             if name_parts[1] in decl.symbols:
                 return decl.symbols[decl.symbols.index(name_parts[1])]
diff --git a/loki/module.py b/loki/module.py
index 8c4fe7ea1..96d597bc8 100644
--- a/loki/module.py
+++ b/loki/module.py
@@ -17,9 +17,8 @@
 from loki.program_unit import ProgramUnit
 from loki.scope import Scope
 from loki.subroutine import Subroutine
-from loki.tools import as_tuple, flatten
+from loki.tools import as_tuple
 from loki.types import ModuleType, SymbolAttributes
-from loki.visitors import FindNodes
 
 
 __all__ = ['Module']
diff --git a/tests/test_batch.py b/tests/test_batch.py
index 862564ede..b4fdc360a 100644
--- a/tests/test_batch.py
+++ b/tests/test_batch.py
@@ -15,7 +15,7 @@
 from loki import (
     HAVE_FP, HAVE_OFP, REGEX, RegexParserClass, as_tuple, CaseInsensitiveDict, gettempdir,
     FileItem, ModuleItem, ProcedureItem, TypeDefItem, ProcedureBindingItem, GlobalVariableItem,
-    SGraph,
+    SGraph, SchedulerConfig,
     Sourcefile, Section, RawSource, Import, CallStatement, Scalar
 )
 
@@ -27,6 +27,23 @@ def fixture_here():
     return Path(__file__).parent
 
 
+@pytest.fixture(name='default_config')
+def fixture_default_config():
+    """
+    Default SchedulerConfig configuration with basic options.
+    """
+    return SchedulerConfig.from_dict({
+        'default': {
+            'mode': 'idem',
+            'role': 'kernel',
+            'expand': True,
+            'strict': True,
+            'disable': ['abort']
+        },
+        'routines': []
+    })
+
+
 @pytest.fixture(scope='module', name='comp1_expected_dependencies')
 def fixture_comp1_expected_dependencies():
     return {
@@ -96,7 +113,7 @@ def get_item(cls, path, name, parser_classes):
     return cls(name, source=source)
 
 
-def test_file_item1(here):
+def test_file_item1(here, default_config):
     proj = here/'sources/projBatch'
 
     # A file with simple module that contains a single subroutine
@@ -125,7 +142,10 @@ def test_file_item1(here):
 
     with pytest.raises(RuntimeError):
         # Without the FileItem in the item_cache, we can't create the modules
-        item.create_definition_items(item_cache={})
+        item.create_definition_items(item_cache={}, config=default_config)
+
+    # However, without strict parsing it will simply return an empty list
+    assert not item.create_definition_items(item_cache={})
 
     items = item.create_definition_items(item_cache={item.name: item})
     assert len(items) == 1
@@ -386,16 +406,19 @@ def test_procedure_item4(here):
     assert items == ('t_mod#t1', 't_mod#t1%way')
 
 
-@pytest.mark.skip()
 @pytest.mark.parametrize('config,expected_dependencies', [
     ({}, ('t_mod#t', 'header_mod#k', 'a_mod#a', 'b_mod#b', 't_mod#t%yay%proc')),
     ({'default': {'disable': ['a']}}, ('t_mod#t', 'header_mod#k', 'b_mod#b', 't_mod#t%yay%proc')),
+    ({'default': {'disable': ['a_mod#a']}}, ('t_mod#t', 'header_mod#k', 'b_mod#b', 't_mod#t%yay%proc')),
+    ({'default': {'disable': ['t_mod#t%yay%proc']}}, ('t_mod#t', 'header_mod#k', 'a_mod#a', 'b_mod#b')),
+    ({'default': {'disable': ['k']}}, ('t_mod#t', 'a_mod#a', 'b_mod#b', 't_mod#t%yay%proc')),
+    ({'default': {'disable': ['header_mod#k']}}, ('t_mod#t', 'a_mod#a', 'b_mod#b', 't_mod#t%yay%proc')),
 ])
 def test_procedure_item_with_config(here, config, expected_dependencies):
     proj = here/'sources/projBatch'
 
     # A file with a single subroutine definition that calls two routines via module imports
-    item = get_item(ProcedureItem, proj/'source/comp2.F90', '#comp2', RegexParserClass.ProgramUnitClass)
+    item = get_item(ProcedureItem, proj/'source/comp2.f90', '#comp2', RegexParserClass.ProgramUnitClass)
 
     # We need to have suitable dependency modules in the cache to spawn the dependency items
     item_cache = {item.name: item}
@@ -406,7 +429,8 @@ def test_procedure_item_with_config(here, config, expected_dependencies):
             ('module/b_mod.F90', 'b_mod'), ('headers/header_mod.F90', 'header_mod')
         ]
     }
-    assert item.create_dependency_items(item_cache=item_cache, config=config) == expected_dependencies
+    scheduler_config = SchedulerConfig.from_dict(config)
+    assert item.create_dependency_items(item_cache=item_cache, config=scheduler_config) == expected_dependencies
 
 
 def test_typedef_item(here):
@@ -490,7 +514,7 @@ def test_procedure_binding_item1(here):
     assert items[0].ir is item.source['t_proc']
 
 
-def test_procedure_binding_item2(here):
+def test_procedure_binding_item2(here, default_config):
     proj = here/'sources/projBatch'
     parser_classes = (
         RegexParserClass.ProgramUnitClass | RegexParserClass.TypeDefClass | RegexParserClass.DeclarationClass
@@ -507,7 +531,7 @@ def test_procedure_binding_item2(here):
     item_cache = {item.name: item}
     with pytest.raises(RuntimeError):
         # Fails because item_cache does not contain the relevant module
-        item.create_dependency_items(item_cache=item_cache)
+        item.create_dependency_items(item_cache=item_cache, config=default_config)
 
     item_cache['t_mod'] = ModuleItem('t_mod', source=item.source)
     items = item.create_dependency_items(item_cache=item_cache)
@@ -524,6 +548,63 @@ def test_procedure_binding_item2(here):
     assert 't_mod#my_way' in item_cache
 
 
+def test_procedure_binding_item3(here):
+    proj = here/'sources/projBatch'
+    parser_classes = (
+        RegexParserClass.ProgramUnitClass | RegexParserClass.TypeDefClass | RegexParserClass.DeclarationClass
+    )
+
+    # 3. An indirect procedure binding via a nested type member, where the type is declared in a different module
+    item = get_item(ProcedureBindingItem, proj/'module/t_mod.F90', 't_mod#t%yay%proc', parser_classes)
+    assert item.name == 't_mod#t%yay%proc'
+    assert isinstance(item.ir, Scalar)
+    assert item.definitions is ()
+    assert not item.create_definition_items(item_cache={})
+    assert item.dependencies == ('yay%proc',)
+
+    item_cache = {item.name: item}
+    item_cache['tt_mod'] = get_item(ModuleItem, proj/'module/tt_mod.F90', 'tt_mod', parser_classes)
+    items = item.create_dependency_items(item_cache=item_cache)
+    assert len(items) == 1
+    assert isinstance(items[0], ProcedureBindingItem)
+    assert items[0].name == 'tt_mod#tt%proc'
+    assert 'tt_mod#tt%proc' in item_cache
+
+    assert 'tt_mod#proc' not in item_cache
+    next_items = items[0].create_dependency_items(item_cache=item_cache)
+    assert len(next_items) == 1
+    assert isinstance(next_items[0], ProcedureItem)
+    assert next_items[0].ir is items[0].source['proc']
+    assert 'tt_mod#proc' in item_cache
+
+
+@pytest.mark.parametrize('config,expected_dependencies', [
+    ({}, (('tt_mod#tt%proc',), ('tt_mod#proc',))),
+    ({'default': {'disable': ['tt_mod#proc']}}, (('tt_mod#tt%proc',), ())),
+    ({'default': {'disable': ['proc']}}, (('tt_mod#tt%proc',), ())),
+    ({'default': {'disable': ['tt%proc']}}, ((),)),
+    ({'default': {'disable': ['tt_mod#tt%proc']}}, ((),)),
+])
+def test_procedure_binding_with_config(here, config, expected_dependencies):
+    proj = here/'sources/projBatch'
+    parser_classes = (
+        RegexParserClass.ProgramUnitClass | RegexParserClass.TypeDefClass | RegexParserClass.DeclarationClass
+    )
+
+    item = get_item(ProcedureBindingItem, proj/'module/t_mod.F90', 't_mod#t%yay%proc', parser_classes)
+
+    # We need to have suitable dependency modules in the cache to spawn the dependency items
+    item_cache = {item.name: item}
+    item_cache['tt_mod'] = get_item(ModuleItem, proj/'module/tt_mod.F90', 'tt_mod', RegexParserClass.ProgramUnitClass)
+    scheduler_config = SchedulerConfig.from_dict(config)
+
+    for dependencies in expected_dependencies:
+        items = item.create_dependency_items(item_cache, config=scheduler_config)
+        assert items == dependencies
+        if items:
+            item = items[0]
+
+
 def test_item_graph(here, comp1_expected_dependencies):
     """
     Build a :any:`nx.Digraph` from a dummy call hierarchy to check the incremental parsing and

From 7d187b0449a412f77f67815aa1baf587f3307b5b Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Mon, 22 May 2023 23:22:35 +0100
Subject: [PATCH 18/23] Expand disable testing

---
 loki/bulk/item.py   | 16 +++++---
 tests/test_batch.py | 96 +++++++++++++++++++++++++++++++++++++++------
 2 files changed, 96 insertions(+), 16 deletions(-)

diff --git a/loki/bulk/item.py b/loki/bulk/item.py
index 6bcf6bbe8..7d8b49aff 100644
--- a/loki/bulk/item.py
+++ b/loki/bulk/item.py
@@ -149,11 +149,15 @@ def concretize_definitions(self):
             self.ir.make_complete(frontend=REGEX, parser_classes=parser_classes)
 
     def concretize_dependencies(self):
-        if self._depends_class and hasattr(self.ir, 'make_complete'):
-            ir = self.ir
-            while ir.parent:
-                ir = ir.parent
-            ir.make_complete(frontend=REGEX, parser_classes=self._depends_class)
+        if not self._depends_class:
+            return
+        scope = self.ir
+        if not isinstance(scope, Scope):
+            scope = scope.scope
+        while scope.parent:
+            scope = scope.parent
+        if hasattr(scope, 'make_complete'):
+            scope.make_complete(frontend=REGEX, parser_classes=self._depends_class)
 
     def _get_procedure_item(self, proc_symbol, item_cache, config):
         # A recursive map of all imports
@@ -211,6 +215,8 @@ def _get_procedure_item(self, proc_symbol, item_cache, config):
 
         # This is a call to a subroutine declared via header-included interface
         item_name = f'#{proc_name}'.lower()
+        if config.is_disabled(item_name):
+            return None
         return item_cache[item_name]
 
 
diff --git a/tests/test_batch.py b/tests/test_batch.py
index b4fdc360a..15f38b01b 100644
--- a/tests/test_batch.py
+++ b/tests/test_batch.py
@@ -32,7 +32,7 @@ def fixture_default_config():
     """
     Default SchedulerConfig configuration with basic options.
     """
-    return SchedulerConfig.from_dict({
+    return {
         'default': {
             'mode': 'idem',
             'role': 'kernel',
@@ -41,10 +41,10 @@ def fixture_default_config():
             'disable': ['abort']
         },
         'routines': []
-    })
+    }
 
 
-@pytest.fixture(scope='module', name='comp1_expected_dependencies')
+@pytest.fixture(name='comp1_expected_dependencies')
 def fixture_comp1_expected_dependencies():
     return {
         '#comp1': ('header_mod', 't_mod#t', '#comp2', 't_mod#t%proc', 't_mod#t%no%way'),
@@ -67,7 +67,7 @@ def fixture_comp1_expected_dependencies():
     }
 
 
-@pytest.fixture(scope='module', name='mod_proc_expected_dependencies')
+@pytest.fixture(name='mod_proc_expected_dependencies')
 def fixture_mod_proc_expected_dependencies():
     return {
         'other_mod#mod_proc': ('tt_mod#tt', 'tt_mod#tt%proc', 'b_mod#b'),
@@ -78,12 +78,12 @@ def fixture_mod_proc_expected_dependencies():
     }
 
 
-@pytest.fixture(scope='module', name='expected_dependencies')
+@pytest.fixture(name='expected_dependencies')
 def fixture_expected_dependencies(comp1_expected_dependencies, mod_proc_expected_dependencies):
     return comp1_expected_dependencies | mod_proc_expected_dependencies
 
 
-@pytest.fixture(scope='module', name='no_expected_dependencies')
+@pytest.fixture(name='no_expected_dependencies')
 def fixture_no_expected_dependencies():
     return {}
 
@@ -142,7 +142,7 @@ def test_file_item1(here, default_config):
 
     with pytest.raises(RuntimeError):
         # Without the FileItem in the item_cache, we can't create the modules
-        item.create_definition_items(item_cache={}, config=default_config)
+        item.create_definition_items(item_cache={}, config=SchedulerConfig.from_dict(default_config))
 
     # However, without strict parsing it will simply return an empty list
     assert not item.create_definition_items(item_cache={})
@@ -422,7 +422,7 @@ def test_procedure_item_with_config(here, config, expected_dependencies):
 
     # We need to have suitable dependency modules in the cache to spawn the dependency items
     item_cache = {item.name: item}
-    item_cache = {
+    item_cache |= {
         (i := get_item(ModuleItem, proj/path, name, RegexParserClass.ProgramUnitClass)).name: i
         for path, name in [
             ('module/t_mod.F90', 't_mod'), ('module/a_mod.F90', 'a_mod'),
@@ -433,6 +433,24 @@ def test_procedure_item_with_config(here, config, expected_dependencies):
     assert item.create_dependency_items(item_cache=item_cache, config=scheduler_config) == expected_dependencies
 
 
+@pytest.mark.parametrize('disable', ['#comp2', 'comp2'])
+def test_procedure_item_with_config2(here, disable):
+    proj = here/'sources/projBatch'
+
+    # Similar to the previous test but checking disabling of subroutines without scope
+    item = get_item(ProcedureItem, proj/'source/comp1.f90', '#comp1', RegexParserClass.ProgramUnitClass)
+
+    item_cache = {item.name: item}
+    item_cache['t_mod'] = get_item(ModuleItem, proj/'module/t_mod.F90', 't_mod', RegexParserClass.ProgramUnitClass)
+    item_cache['header_mod'] = get_item(
+        ModuleItem, proj/'headers/header_mod.F90', 'header_mod', RegexParserClass.ProgramUnitClass
+    )
+    scheduler_config = SchedulerConfig.from_dict({'default': {'disable': [disable]}})
+    assert item.create_dependency_items(item_cache=item_cache, config=scheduler_config) == (
+        't_mod#t', 'header_mod', 't_mod#t%proc', 't_mod#t%no%way'
+    )
+
+
 def test_typedef_item(here):
     proj = here/'sources/projBatch'
 
@@ -531,7 +549,7 @@ def test_procedure_binding_item2(here, default_config):
     item_cache = {item.name: item}
     with pytest.raises(RuntimeError):
         # Fails because item_cache does not contain the relevant module
-        item.create_dependency_items(item_cache=item_cache, config=default_config)
+        item.create_dependency_items(item_cache=item_cache, config=SchedulerConfig.from_dict(default_config))
 
     item_cache['t_mod'] = ModuleItem('t_mod', source=item.source)
     items = item.create_dependency_items(item_cache=item_cache)
@@ -658,13 +676,14 @@ def test_item_graph(here, comp1_expected_dependencies):
     # plt.savefig('test_item_graph.png')
 
 
+@pytest.mark.parametrize('with_default_config', [False, True])
 @pytest.mark.parametrize('seed,dependencies_fixture', [
     ('#comp1', 'comp1_expected_dependencies'),
     ('other_mod#mod_proc', 'mod_proc_expected_dependencies'),
     (['#comp1', 'other_mod#mod_proc'], 'expected_dependencies'),
     ('foobar', 'no_expected_dependencies')
 ])
-def test_sgraph_from_seed(here, seed, dependencies_fixture, request):
+def test_sgraph_from_seed(here, with_default_config, default_config, seed, dependencies_fixture, request):
     expected_dependencies = request.getfixturevalue(dependencies_fixture)
     proj = here/'sources/projBatch'
     suffixes = ['.f90', '.F90']
@@ -683,7 +702,10 @@ def test_sgraph_from_seed(here, seed, dependencies_fixture, request):
         item_cache.update((item.name, item) for item in file_item.create_definition_items(item_cache=item_cache))
 
     # Create the graph
-    sgraph = SGraph(seed, item_cache)
+    if with_default_config:
+        sgraph = SGraph(seed, item_cache, SchedulerConfig.from_dict(default_config))
+    else:
+        sgraph = SGraph(seed, item_cache)
 
     # Check the graph
     assert set(sgraph.items) == set(expected_dependencies)
@@ -708,3 +730,55 @@ def test_sgraph_from_seed(here, seed, dependencies_fixture, request):
     }
     graph_file.unlink()
     graph_file.with_suffix('.dot.pdf').unlink()
+
+
+@pytest.mark.parametrize('seed, disable,active_nodes', [
+    ('#comp1', ('comp2', 'a'), (
+        '#comp1', 't_mod#t', 'header_mod', 't_mod#t%proc', 't_mod#t%no%way',
+        't_mod#t_proc', 't_mod#t%yay%proc', 'tt_mod#tt%proc', 'tt_mod#proc',
+        't_mod#t1%way', 't_mod#my_way', 'tt_mod#tt', 't_mod#t1'
+    )),
+    ('#comp1', ('comp2', 'a', 't_mod#t%no%way'), (
+        '#comp1', 't_mod#t', 'header_mod', 't_mod#t%proc',
+        't_mod#t_proc', 't_mod#t%yay%proc', 'tt_mod#tt%proc', 'tt_mod#proc',
+        'tt_mod#tt', 't_mod#t1'
+    )),
+    ('#comp1', ('#comp2', 't1%way'), (
+        '#comp1', 't_mod#t', 'header_mod', 't_mod#t%proc', 't_mod#t%no%way',
+        't_mod#t_proc', 't_mod#t%yay%proc', 'tt_mod#tt%proc', 'tt_mod#proc',
+        'tt_mod#tt', 't_mod#t1', 'a_mod#a', 'header_mod#k'
+    )),
+    ('t_mod#t_proc', ('t_mod#t1', 'proc'), (
+        't_mod#t_proc', 't_mod#t', 'tt_mod#tt', 'a_mod#a', 'header_mod#k',
+        't_mod#t%yay%proc', 'tt_mod#tt%proc'
+    ))
+])
+def test_sgraph_disable(here, default_config, expected_dependencies, seed, disable, active_nodes):
+    proj = here/'sources/projBatch'
+    suffixes = ['.f90', '.F90']
+
+    path_list = [f for ext in suffixes for f in proj.glob(f'**/*{ext}')]
+    assert len(path_list) == 8
+
+    # Map item names to items
+    item_cache = CaseInsensitiveDict()
+
+    # Instantiate the basic list of items (files, modules, subroutines)
+    for path in path_list:
+        relative_path = str(path.relative_to(proj))
+        file_item = get_item(FileItem, path, relative_path, RegexParserClass.ProgramUnitClass)
+        item_cache[relative_path] = file_item
+        item_cache.update((item.name, item) for item in file_item.create_definition_items(item_cache=item_cache))
+
+    # Create the graph
+    default_config['default']['disable'] = disable
+    sgraph = SGraph(seed, item_cache, SchedulerConfig.from_dict(default_config))
+
+    # Check the graph
+    assert set(sgraph.items) == set(active_nodes)
+    assert set(sgraph.dependencies) == {
+        (node, dependency)
+        for node, dependencies in expected_dependencies.items()
+        for dependency in dependencies
+        if node in active_nodes and dependency in active_nodes
+    }

From 4b63d8a7e960f2667b7bd2fcfaee80a62d44cd46 Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Mon, 22 May 2023 23:23:06 +0100
Subject: [PATCH 19/23] Some documentation on new items

---
 loki/bulk/item.py | 129 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 128 insertions(+), 1 deletion(-)

diff --git a/loki/bulk/item.py b/loki/bulk/item.py
index 7d8b49aff..c7d07bdf4 100644
--- a/loki/bulk/item.py
+++ b/loki/bulk/item.py
@@ -123,32 +123,75 @@ def __hash__(self):
 
     @property
     def definitions(self):
+        """
+        Return a tuple of the IR nodes this item defines
+
+        By default, this returns an empty tuple and is overwritten by
+        derived classes.
+        """
         return ()
 
     @property
     def dependencies(self):
+        """
+        Return a tuple of IR nodes that constitute dependencies for this item
+
+        This calls :meth:`concretize_dependencies` to trigger a further parse
+        with the :any:`REGEX` frontend, including the :attr:`_depends_class` of
+        the item. The list of actual dependencies is defined via :meth:`_dependencies`,
+        which is overwritten by derived classes.
+        """
         self.concretize_dependencies()
         return self._dependencies
 
     @property
     def _dependencies(self):
+        """
+        Return a tuple of the IR nodes that constitute dependencies for this item
+
+        This method is used by :attr:`dependencies` to determine the actual
+        dependencies after calling :meth:`concretize_dependencies`.
+
+        By default, this returns an empty tuple and is overwritten by
+        derived classes.
+        """
         return ()
 
     @property
     def ir(self):
+        """
+        Return the IR :any:`Node` that the item represents
+        """
         return self.source[self.local_name]
 
     def _parser_classes_from_item_type_names(self, item_type_names):
+        """
+        Helper method that queries the :attr:`Item._parser_class` of all
+        :any:`Item` subclasses listed in :data:`item_type_names`
+        """
         item_types = [getattr(sys.modules[__name__], name) for name in item_type_names]
         parser_classes = [p for item_type in item_types if (p := item_type._parser_class) is not None]
         return reduce(lambda x, y: x | y, parser_classes, RegexParserClass.EmptyClass)
 
     def concretize_definitions(self):
+        """
+        Trigger a re-parse of the source file corresponding to the current item's scope
+
+        This uses :meth:`_parser_classes_from_item_type_names` to determine all
+        :any:`RegexParserClass` that the item's definitions require to be parsed.
+        An item's definition classes are listed in :attr:`_defines_items`.
+        """
         parser_classes = self._parser_classes_from_item_type_names(self._defines_items)
         if parser_classes and hasattr(self.ir, 'make_complete'):
             self.ir.make_complete(frontend=REGEX, parser_classes=parser_classes)
 
     def concretize_dependencies(self):
+        """
+        Trigger a re-parse of the source file corresponding to the current item's scope
+
+        This uses :attr:`_depends_class` to determine all :any:`RegexParserClass` that
+        the are require to be parsed to find the item's dependencies.
+        """
         if not self._depends_class:
             return
         scope = self.ir
@@ -219,9 +262,44 @@ def _get_procedure_item(self, proc_symbol, item_cache, config):
             return None
         return item_cache[item_name]
 
-
     @classmethod
     def _get_or_create_item(cls, item_cls, item_name, item_cache, scope_name, config):
+        """
+        Helper method to instantiate the :any:`Item` with name :data:`item_name`
+        of class :data:`item_cls`
+
+        This helper method checks for the presence of :data:`item_name` in the
+        :data:`item_cache` and returns that instance. If none is found, an instance
+        of :data:`item_cls` is created and stored in the item cache.
+
+        The :data:`scope_name` denotes the name of the parent scope, under which a
+        parent :any:`Item` has to exist in :data:`item_cache` to find the source
+        item to use.
+
+        Item names matching one of the entries in the :data:`config` disable list
+        are skipped. If `strict` mode is enabled, this raises a :any:`RuntimeError`
+        if no matching parent item can be found in the item cache.
+
+        Parameters
+        ----------
+        item_cls : subclass of :any:`Item`
+            The class of the item to create
+        item_name : str
+            The name of the item to create
+        item_cache : dict
+            The cache of existing :any:`Item` objects, mapping the item's names to
+            item objects
+        scope_name : str
+            The name under which a parent item can be found in the :data:`item_cache`
+        config : :any:`SchedulerConfig`
+            The config object to use to determine disabled items, and to use when
+            instantiating the new item
+
+        Returns
+        -------
+        :any:`Item` or None
+            The item object or `None` if disabled or impossible to create
+        """
         if config and config.is_disabled(item_name):
             return None
         if item_name in item_cache:
@@ -238,6 +316,17 @@ def _get_or_create_item(cls, item_cls, item_name, item_cache, scope_name, config
         return item
 
     def _create_from_ir(self, node, item_cache, config):
+        """
+        Helper method to create items for definitions or dependency
+
+        This is a helper method to determine the fully-qualified item names
+        and item type for a given IR :any:`Node`, e.g., when creating the items
+        for definitions (see :meth:`create_definition_items`) or dependencies
+        (see :meth:`create_dependency_items`).
+
+        This routine's responsibility is to determine the item name, and then call
+        :meth:`_get_or_create_item` to look-up an existing items or create it.
+        """
         if isinstance(node, Module):
             # We may create ModuleItem in two situations:
             # 1. as a dependency, when it is likely already present in the item_cache, or
@@ -294,12 +383,50 @@ def _create_from_ir(self, node, item_cache, config):
         raise ValueError(f'{node} has an unsupported node type {type(node)}')
 
     def create_definition_items(self, item_cache, config=None, only=None):
+        """
+        Create the :any:`Item` nodes corresponding to the definitions in the
+        current item
+
+        Parameters
+        ----------
+        item_cache : dict
+            The cache of existing :any:`Item` objects, mapping item names to
+            item objects
+        config : :any:`SchedulerConfig`, optional
+            The scheduler config to use when instantiating new items
+        only : list of :any:`Item` classes
+            Filter the generated items to include only those provided in the list
+
+        Returns
+        -------
+        tuple
+            The list of :any:`Item` nodes
+        """
         items = as_tuple(flatten(self._create_from_ir(node, item_cache, config) for node in self.definitions))
         if only:
             items = tuple(item for item in items if isinstance(item, only))
         return items
 
     def create_dependency_items(self, item_cache, config=None, only=None):
+        """
+        Create the :any:`Item` nodes corresponding to the dependencies of the
+        current item
+
+        Parameters
+        ----------
+        item_cache : dict
+            The cache of existing :any:`Item` objects, mapping item names to
+            item objects
+        config : :any:`SchedulerConfig`, optional
+            The scheduler config to use when instantiating new items
+        only : list of :any:`Item` classes
+            Filter the generated items to include only those provided in the list
+
+        Returns
+        -------
+        tuple
+            The list of :any:`Item` nodes
+        """
         if not (dependencies := self.dependencies):
             return ()
 

From 92cff45201b3f822ccdce0523f213ef3ba2b08ee Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Tue, 23 May 2023 00:37:28 +0100
Subject: [PATCH 20/23] Support for ignore and block in SGraph

---
 loki/bulk/item.py      |   2 +-
 loki/bulk/scheduler.py |  27 +++++++----
 tests/test_batch.py    | 108 +++++++++++++++++++++++++++++++++++------
 3 files changed, 112 insertions(+), 25 deletions(-)

diff --git a/loki/bulk/item.py b/loki/bulk/item.py
index c7d07bdf4..c02cc0527 100644
--- a/loki/bulk/item.py
+++ b/loki/bulk/item.py
@@ -258,7 +258,7 @@ def _get_procedure_item(self, proc_symbol, item_cache, config):
 
         # This is a call to a subroutine declared via header-included interface
         item_name = f'#{proc_name}'.lower()
-        if config.is_disabled(item_name):
+        if config and config.is_disabled(item_name):
             return None
         return item_cache[item_name]
 
diff --git a/loki/bulk/scheduler.py b/loki/bulk/scheduler.py
index ef504d0b8..3e368e04f 100644
--- a/loki/bulk/scheduler.py
+++ b/loki/bulk/scheduler.py
@@ -111,7 +111,8 @@ def from_file(cls, path):
 
         return cls.from_dict(config)
 
-    def _get_item_keys(self, item_name, property_name):
+    @staticmethod
+    def match_item_keys(item_name, keys):
         """
         Helper routine to match a :any:`Item` name, which includes a scope,
         to entries in a config property, where names are allowed to appear
@@ -119,7 +120,7 @@ def _get_item_keys(self, item_name, property_name):
         """
         item_name = item_name.lower()
         item_names = (item_name, item_name[item_name.find('#')+1:])
-        return tuple(key for key in getattr(self, property_name) or () if key in item_names)
+        return tuple(key for key in keys or () if key in item_names)
 
     def create_item_config(self, name):
         """
@@ -128,7 +129,7 @@ def create_item_config(self, name):
         The resulting config object contains the :attr:`default`
         values and any item-specific overwrites and additions.
         """
-        keys = self._get_item_keys(name, 'routines')
+        keys = self.match_item_keys(name, self.routines)
         if len(keys) > 1:
             if self.default.get('strict'):
                 raise RuntimeError(f'{name} matches multiple config entries: {", ".join(keys)}')
@@ -142,7 +143,7 @@ def is_disabled(self, name):
         """
         Check if the item with the given :data:`name` is marked as `disabled`
         """
-        return len(self._get_item_keys(name, 'disable')) > 0
+        return len(self.match_item_keys(name, self.disable)) > 0
 
 
 class Scheduler:
@@ -860,12 +861,18 @@ def populate(self, seed, item_cache, config):
         # Populate the graph
         while queue:
             item = queue.popleft()
-            dependencies = item.create_dependency_items(item_cache=item_cache, config=config)
-            new_items = [item_ for item_ in dependencies if item_ not in self._graph]
-            if new_items:
-                self.add_nodes(new_items)
-                queue.extend(new_items)
-            self.add_edges((item, item_) for item_ in dependencies)
+
+            if item.expand:
+                dependencies = []
+                items_to_ignore = [*item.block, *item.ignore]
+                for dependency in item.create_dependency_items(item_cache=item_cache, config=config):
+                    if not SchedulerConfig.match_item_keys(dependency.name, items_to_ignore):
+                        dependencies += [dependency]
+                new_items = [item_ for item_ in dependencies if item_ not in self._graph]
+                if new_items:
+                    self.add_nodes(new_items)
+                    queue.extend(new_items)
+                self.add_edges((item, item_) for item_ in dependencies)
 
     @property
     def items(self):
diff --git a/tests/test_batch.py b/tests/test_batch.py
index 15f38b01b..25ca0030d 100644
--- a/tests/test_batch.py
+++ b/tests/test_batch.py
@@ -108,9 +108,13 @@ def edges(self):
         return list(self._re_edges.findall(self.text))
 
 
-def get_item(cls, path, name, parser_classes):
+def get_item(cls, path, name, parser_classes, scheduler_config=None):
     source = Sourcefile.from_file(path, frontend=REGEX, parser_classes=parser_classes)
-    return cls(name, source=source)
+    if scheduler_config:
+        config = scheduler_config.create_item_config(name)
+    else:
+        config = None
+    return cls(name, source=source, config=config)
 
 
 def test_file_item1(here, default_config):
@@ -676,14 +680,13 @@ def test_item_graph(here, comp1_expected_dependencies):
     # plt.savefig('test_item_graph.png')
 
 
-@pytest.mark.parametrize('with_default_config', [False, True])
 @pytest.mark.parametrize('seed,dependencies_fixture', [
     ('#comp1', 'comp1_expected_dependencies'),
     ('other_mod#mod_proc', 'mod_proc_expected_dependencies'),
     (['#comp1', 'other_mod#mod_proc'], 'expected_dependencies'),
     ('foobar', 'no_expected_dependencies')
 ])
-def test_sgraph_from_seed(here, with_default_config, default_config, seed, dependencies_fixture, request):
+def test_sgraph_from_seed(here, default_config, seed, dependencies_fixture, request):
     expected_dependencies = request.getfixturevalue(dependencies_fixture)
     proj = here/'sources/projBatch'
     suffixes = ['.f90', '.F90']
@@ -691,21 +694,26 @@ def test_sgraph_from_seed(here, with_default_config, default_config, seed, depen
     path_list = [f for ext in suffixes for f in proj.glob(f'**/*{ext}')]
     assert len(path_list) == 8
 
+    scheduler_config = SchedulerConfig.from_dict(default_config)
+
     # Map item names to items
     item_cache = CaseInsensitiveDict()
 
     # Instantiate the basic list of items (files, modules, subroutines)
     for path in path_list:
         relative_path = str(path.relative_to(proj))
-        file_item = get_item(FileItem, path, relative_path, RegexParserClass.ProgramUnitClass)
+        file_item = get_item(
+            FileItem, path, relative_path, RegexParserClass.ProgramUnitClass,
+            scheduler_config
+        )
         item_cache[relative_path] = file_item
-        item_cache.update((item.name, item) for item in file_item.create_definition_items(item_cache=item_cache))
+        item_cache.update(
+            (item.name, item)
+            for item in file_item.create_definition_items(item_cache=item_cache, config=scheduler_config)
+        )
 
     # Create the graph
-    if with_default_config:
-        sgraph = SGraph(seed, item_cache, SchedulerConfig.from_dict(default_config))
-    else:
-        sgraph = SGraph(seed, item_cache)
+    sgraph = SGraph(seed, item_cache, scheduler_config)
 
     # Check the graph
     assert set(sgraph.items) == set(expected_dependencies)
@@ -760,19 +768,91 @@ def test_sgraph_disable(here, default_config, expected_dependencies, seed, disab
     path_list = [f for ext in suffixes for f in proj.glob(f'**/*{ext}')]
     assert len(path_list) == 8
 
+    default_config['default']['disable'] = disable
+    scheduler_config = SchedulerConfig.from_dict(default_config)
+
     # Map item names to items
     item_cache = CaseInsensitiveDict()
 
     # Instantiate the basic list of items (files, modules, subroutines)
     for path in path_list:
         relative_path = str(path.relative_to(proj))
-        file_item = get_item(FileItem, path, relative_path, RegexParserClass.ProgramUnitClass)
+        file_item = get_item(
+            FileItem, path, relative_path, RegexParserClass.ProgramUnitClass,
+            scheduler_config
+        )
         item_cache[relative_path] = file_item
-        item_cache.update((item.name, item) for item in file_item.create_definition_items(item_cache=item_cache))
+        item_cache.update(
+            (item.name, item)
+            for item in file_item.create_definition_items(item_cache=item_cache, config=scheduler_config)
+        )
 
     # Create the graph
-    default_config['default']['disable'] = disable
-    sgraph = SGraph(seed, item_cache, SchedulerConfig.from_dict(default_config))
+    sgraph = SGraph(seed, item_cache, scheduler_config)
+
+    # Check the graph
+    assert set(sgraph.items) == set(active_nodes)
+    assert set(sgraph.dependencies) == {
+        (node, dependency)
+        for node, dependencies in expected_dependencies.items()
+        for dependency in dependencies
+        if node in active_nodes and dependency in active_nodes
+    }
+
+
+@pytest.mark.parametrize('seed,routines,active_nodes', [
+    (
+        '#comp1', [
+            {'name': '#comp1', 'expand': False}
+        ], (
+            '#comp1',
+        )
+    ),
+    (
+        '#comp2', [
+            {'name': '#comp2', 'block': ['a', 'b']}
+        ], (
+            '#comp2', 't_mod#t', 'header_mod#k', 't_mod#t%yay%proc', 'tt_mod#tt',
+            't_mod#t1', 'tt_mod#tt%proc', 'tt_mod#proc'
+        )
+    ),
+    (
+        '#comp2', [
+            {'name': '#comp2', 'ignore': ['a'], 'block': ['b']}
+        ], (
+            '#comp2', 't_mod#t', 'header_mod#k', 't_mod#t%yay%proc', 'tt_mod#tt',
+            't_mod#t1', 'tt_mod#tt%proc', 'tt_mod#proc'
+        )
+    ),
+])
+def test_sgraph_routines(here, default_config, expected_dependencies, seed, routines, active_nodes):
+    proj = here/'sources/projBatch'
+    suffixes = ['.f90', '.F90']
+
+    path_list = [f for ext in suffixes for f in proj.glob(f'**/*{ext}')]
+    assert len(path_list) == 8
+
+    default_config['routine'] = routines
+    scheduler_config = SchedulerConfig.from_dict(default_config)
+
+    # Map item names to items
+    item_cache = CaseInsensitiveDict()
+
+    # Instantiate the basic list of items (files, modules, subroutines)
+    for path in path_list:
+        relative_path = str(path.relative_to(proj))
+        file_item = get_item(
+            FileItem, path, relative_path, RegexParserClass.ProgramUnitClass,
+            scheduler_config
+        )
+        item_cache[relative_path] = file_item
+        item_cache.update(
+            (item.name, item)
+            for item in file_item.create_definition_items(item_cache=item_cache, config=scheduler_config)
+        )
+
+    # Create the graph
+    sgraph = SGraph(seed, item_cache, scheduler_config)
 
     # Check the graph
     assert set(sgraph.items) == set(active_nodes)

From 7f598319144494cbbe6d6e573b5183cc116f7148 Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Tue, 23 May 2023 09:08:05 +0100
Subject: [PATCH 21/23] Fix typos and Linter warnings

---
 loki/bulk/item.py      | 114 ++++++++++++++++++++++++++++++++++++++++-
 loki/frontend/regex.py |   3 +-
 tests/test_batch.py    |   8 +--
 3 files changed, 118 insertions(+), 7 deletions(-)

diff --git a/loki/bulk/item.py b/loki/bulk/item.py
index c02cc0527..bd2ab76ef 100644
--- a/loki/bulk/item.py
+++ b/loki/bulk/item.py
@@ -211,7 +211,7 @@ def _get_procedure_item(self, proc_symbol, item_cache, config):
         current_module = None
         while scope:
             if hasattr(scope, 'import_map'):
-                import_map |= scope.import_map
+                import_map.update(scope.import_map)
             if isinstance(scope, Module):
                 current_module = scope
             scope = scope.parent
@@ -252,7 +252,7 @@ def _get_procedure_item(self, proc_symbol, item_cache, config):
             item_name = f'{scope_name}#{proc_name}'.lower()
             return self._get_or_create_item(ProcedureItem, item_name, item_cache, scope_name, config)
 
-        if proc_name in (intf_map := self.ir.interface_symbols):
+        if proc_name in self.ir.interface_symbols:
             # TODO: Handle declaration via interface
             raise NotImplementedError()
 
@@ -817,6 +817,28 @@ def definitions(self):
     def ir(self):
         return self.source
 
+    # Below properties are only here to appease the Linter and become
+    # redundant once the Item base class has been cleaned up
+    @property
+    def calls(self):
+        pass
+
+    @property
+    def function_interfaces(self):
+        pass
+
+    @property
+    def imports(self):
+        pass
+
+    @property
+    def members(self):
+        pass
+
+    @property
+    def routine(self):
+        pass
+
 
 class ModuleItem(Item):
 
@@ -837,6 +859,28 @@ def _dependencies(self):
     def local_name(self):
         return self.name
 
+    # Below properties are only here to appease the Linter and become
+    # redundant once the Item base class has been cleaned up
+    @property
+    def calls(self):
+        pass
+
+    @property
+    def function_interfaces(self):
+        pass
+
+    @property
+    def imports(self):
+        pass
+
+    @property
+    def members(self):
+        pass
+
+    @property
+    def routine(self):
+        pass
+
 
 class ProcedureItem(Item):
 
@@ -866,6 +910,28 @@ def _dependencies(self):
                 imports += tuple(imprt for type_name in type_names if (imprt := import_map.get(type_name)))
         return imports + typedefs + calls
 
+    # Below properties are only here to appease the Linter and become
+    # redundant once the Item base class has been cleaned up
+    @property
+    def calls(self):
+        pass
+
+    @property
+    def function_interfaces(self):
+        pass
+
+    @property
+    def imports(self):
+        pass
+
+    @property
+    def members(self):
+        pass
+
+    @property
+    def routine(self):
+        pass
+
 
 class TypeDefItem(Item):
 
@@ -890,11 +956,55 @@ def _dependencies(self):
 
         return tuple(dict.fromkeys(imports + typedefs))
 
+    # Below properties are only here to appease the Linter and become
+    # redundant once the Item base class has been cleaned up
+    @property
+    def calls(self):
+        pass
+
+    @property
+    def function_interfaces(self):
+        pass
+
+    @property
+    def imports(self):
+        pass
+
+    @property
+    def members(self):
+        pass
+
+    @property
+    def routine(self):
+        pass
+
 
 class InterfaceItem(Item):
 
     _parser_class = RegexParserClass.InterfaceClass
 
+    # Below properties are only here to appease the Linter and become
+    # redundant once the Item base class has been cleaned up
+    @property
+    def calls(self):
+        pass
+
+    @property
+    def function_interfaces(self):
+        pass
+
+    @property
+    def imports(self):
+        pass
+
+    @property
+    def members(self):
+        pass
+
+    @property
+    def routine(self):
+        pass
+
 
 class GlobalVariableItem(Item):
 
diff --git a/loki/frontend/regex.py b/loki/frontend/regex.py
index 1118d1c17..8332bbf1e 100644
--- a/loki/frontend/regex.py
+++ b/loki/frontend/regex.py
@@ -900,7 +900,8 @@ class VariableDeclarationPattern(Pattern):
     def __init__(self):
         super().__init__(
             r'^(((?:type|class)[ \t]*\([ \t]*(?P<typename>\w+)[ \t]*\))|' # TYPE or CLASS keyword with typename
-            r'^([ \t]*(?P<basic_type>(logical|real|integer|complex|character))(?P<param>\((kind|len)=[a-z0-9_-]+\))?[ \t]*))'
+            r'^([ \t]*(?P<basic_type>(logical|real|integer|complex|character))'
+            r'(?P<param>\((kind|len)=[a-z0-9_-]+\))?[ \t]*))'
             r'(?:[ \t]*,[ \t]*[a-z]+(?:\((.(\(.*\))?)*?\))?)*'  # Optional attributes
             r'(?:[ \t]*::)?'  # Optional `::` delimiter
             r'[ \t]*'  # Some white space
diff --git a/tests/test_batch.py b/tests/test_batch.py
index 25ca0030d..8730b216e 100644
--- a/tests/test_batch.py
+++ b/tests/test_batch.py
@@ -305,7 +305,7 @@ def test_procedure_item1(here):
     item_cache = {
         (i := get_item(ModuleItem, proj/path, name, RegexParserClass.ProgramUnitClass)).name: i
         for path, name in [
-            ('module/t_mod.F90', 't_mod'), ('source/comp2.F90', '#comp2'), ('headers/header_mod.F90', 'header_mod')
+            ('module/t_mod.F90', 't_mod'), ('source/comp2.f90', '#comp2'), ('headers/header_mod.F90', 'header_mod')
         ]
     }
 
@@ -327,7 +327,7 @@ def test_procedure_item2(here):
     proj = here/'sources/projBatch'
 
     # A file with a single subroutine definition that calls two routines via module imports
-    item = get_item(ProcedureItem, proj/'source/comp2.F90', '#comp2', RegexParserClass.ProgramUnitClass)
+    item = get_item(ProcedureItem, proj/'source/comp2.f90', '#comp2', RegexParserClass.ProgramUnitClass)
     assert item.name == '#comp2'
     assert item.ir is item.source['comp2']
     assert item.definitions is ()
@@ -442,7 +442,7 @@ def test_procedure_item_with_config2(here, disable):
     proj = here/'sources/projBatch'
 
     # Similar to the previous test but checking disabling of subroutines without scope
-    item = get_item(ProcedureItem, proj/'source/comp1.f90', '#comp1', RegexParserClass.ProgramUnitClass)
+    item = get_item(ProcedureItem, proj/'source/comp1.F90', '#comp1', RegexParserClass.ProgramUnitClass)
 
     item_cache = {item.name: item}
     item_cache['t_mod'] = get_item(ModuleItem, proj/'module/t_mod.F90', 't_mod', RegexParserClass.ProgramUnitClass)
@@ -491,7 +491,7 @@ def test_typedef_item(here):
     assert not items[0].dependencies
 
 
-def test_interface_item(here):
+def test_interface_item():
     pass
 
 

From 691264103073fe0bc70bb53fe5ffc7b522cc71a0 Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Tue, 23 May 2023 11:42:00 +0100
Subject: [PATCH 22/23] Resilience against items that cannot be found in
 non-strict discovery

---
 loki/bulk/item.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/loki/bulk/item.py b/loki/bulk/item.py
index bd2ab76ef..ec5fd4a76 100644
--- a/loki/bulk/item.py
+++ b/loki/bulk/item.py
@@ -239,9 +239,11 @@ def _get_procedure_item(self, proc_symbol, item_cache, config):
                 #     hoping the corresponding TypeDefItem has already been created, which it
                 #     will probably not have been. Therefore, we require the underlying Fortran to
                 #     have fully-qualified imports instead
-                raise RuntimeError(
-                    f'Unable to find the module declaring {type_name}. Import via `USE` without `ONLY`?'
-                )
+                msg = f'Unable to find the module declaring {type_name}. Import via `USE` without `ONLY`?'
+                if self.strict:
+                    raise RuntimeError(msg)
+                warning(msg)
+                return None
             item_name = f'{scope_name}#{type_name}%{"%".join(proc_symbol.name_parts[1:])}'.lower()
             return self._get_or_create_item(ProcedureBindingItem, item_name, item_cache, scope_name, config)
 
@@ -260,6 +262,11 @@ def _get_procedure_item(self, proc_symbol, item_cache, config):
         item_name = f'#{proc_name}'.lower()
         if config and config.is_disabled(item_name):
             return None
+        if item_name not in item_cache:
+            if self.strict:
+                raise RuntimeError(f'Procedure {item_name} not found in item_cache.')
+            warning(f'Procedure {item_name} not found in item_cache.')
+            return None
         return item_cache[item_name]
 
     @classmethod
@@ -358,7 +365,10 @@ def _create_from_ir(self, node, item_cache, config):
             # depend only on the imported module
             scope_name = node.module.lower()
             if scope_name not in item_cache:
-                raise RuntimeError(f'Module {scope_name} not found in item_cache')
+                if self.strict:
+                    raise RuntimeError(f'Module {scope_name} not found in item_cache')
+                warning(f'Module {scope_name} not found in item_cache')
+                return None
             scope_item = item_cache[scope_name]
             if node.symbols:
                 scope_definitions = {

From 1f3fefb45ebbe3d94668484e8494b4bcde2e6b9b Mon Sep 17 00:00:00 2001
From: Balthasar Reuter <balthasar.reuter@ecmwf.int>
Date: Tue, 23 May 2023 11:43:09 +0100
Subject: [PATCH 23/23] Python 3.8 compatibility

---
 loki/bulk/item.py   | 22 ++++++++++++++++++++++
 tests/test_batch.py |  9 ++++++---
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/loki/bulk/item.py b/loki/bulk/item.py
index ec5fd4a76..c5aa5b833 100644
--- a/loki/bulk/item.py
+++ b/loki/bulk/item.py
@@ -1028,6 +1028,28 @@ def ir(self):
                 return decl
         raise RuntimeError(f'Declaration for {local_name} cannot be found in {self.scope_name}')
 
+    # Below properties are only here to appease the Linter and become
+    # redundant once the Item base class has been cleaned up
+    @property
+    def calls(self):
+        pass
+
+    @property
+    def function_interfaces(self):
+        pass
+
+    @property
+    def imports(self):
+        pass
+
+    @property
+    def members(self):
+        pass
+
+    @property
+    def routine(self):
+        pass
+
 
 class SubroutineItem(Item):
     """
diff --git a/tests/test_batch.py b/tests/test_batch.py
index 8730b216e..bda0bede2 100644
--- a/tests/test_batch.py
+++ b/tests/test_batch.py
@@ -80,7 +80,10 @@ def fixture_mod_proc_expected_dependencies():
 
 @pytest.fixture(name='expected_dependencies')
 def fixture_expected_dependencies(comp1_expected_dependencies, mod_proc_expected_dependencies):
-    return comp1_expected_dependencies | mod_proc_expected_dependencies
+    dependencies = {}
+    dependencies.update(comp1_expected_dependencies)
+    dependencies.update(mod_proc_expected_dependencies)
+    return dependencies
 
 
 @pytest.fixture(name='no_expected_dependencies')
@@ -426,13 +429,13 @@ def test_procedure_item_with_config(here, config, expected_dependencies):
 
     # We need to have suitable dependency modules in the cache to spawn the dependency items
     item_cache = {item.name: item}
-    item_cache |= {
+    item_cache.update({
         (i := get_item(ModuleItem, proj/path, name, RegexParserClass.ProgramUnitClass)).name: i
         for path, name in [
             ('module/t_mod.F90', 't_mod'), ('module/a_mod.F90', 'a_mod'),
             ('module/b_mod.F90', 'b_mod'), ('headers/header_mod.F90', 'header_mod')
         ]
-    }
+    })
     scheduler_config = SchedulerConfig.from_dict(config)
     assert item.create_dependency_items(item_cache=item_cache, config=scheduler_config) == expected_dependencies