diff --git a/.gitignore b/.gitignore index 0fd8c5b122..0836d14982 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,5 @@ src/*.egg-info .venv cov.xml .coverage.* +*.psycache +__pycache__ diff --git a/changelog b/changelog index 3afe1d6dd3..8fb6369f5e 100644 --- a/changelog +++ b/changelog @@ -26,6 +26,9 @@ 9) PR #2842 for #2841. Update all copyright date ranges for 2025. + 10) PR #2810. Adds caching of the fparser2 parse tree to FileInfo. Is + disabled by default. + release 3.0.0 6th of December 2024 1) PR #2477 for #2463. Add support for Fortran Namelist statements. diff --git a/doc/developer_guide/module_manager.rst b/doc/developer_guide/module_manager.rst index 1787ddf33c..bcad26a6c3 100644 --- a/doc/developer_guide/module_manager.rst +++ b/doc/developer_guide/module_manager.rst @@ -41,6 +41,8 @@ .. _module_manager: + + Module Manager ############## @@ -135,3 +137,65 @@ which prints the filenames of all modules used in ``tl_testkern_mod``: Module: constants_mod constants_mod.f90 Module: fs_continuity_mod fs_continuity_mod.f90 Module: kernel_mod kernel_mod.f90 + + + +FileInfo +======== + +FileInfo is a class that is used to store information about Fortran files. + +This information can include: + +- The source code itself +- The fparser tree information +- The PSyIR tree information + +All this information is gathered in this single class since this also +allows for caching of it, see next section + + + +Caching +======= + +The `ModuleManager` and `FileInfo` support a caching of the +fparser tree representation of a source code. +(Support for PSyIR is planned) + +This caching has to be **explicitly enabled** in the constructor +of `ModuleManager`. + + +.. testcode :: + + mod_manager = ModuleManager.get(use_caching=True) + + +Most of the time in the PSyIR generation is currently spent in the +fparser tree generation. Consequently, this leads to significant +speed-ups in the process of reading and parsing the source code +of modules. + +The default cache file is named the same way as the source file, +but replaces the file extension with `.psycache`. E.g., a cache file +for the source file `foo.f90` will be called `foo.psycache`. + +The caching algorithm to obtain the fparser tree OR PSyIR is briefly described as follows: + +- If fparser tree / PSyIR was read before: RETURN fparser tree or PSyIR +- If source code is not yet read: + + - Read the content of the file + - Create the source's checksum. +- Read cache file if it exists: + + - If the checksum of the cache is the same as the one of the source: + + - load the fparser tree / PSyIR from the cache file and RETURN fparser tree or PSyIR +- Create the fparser tree / PSyIR from the source code +- Save cache file IF it was not loaded before: + + - Update cache information + - Store to cache file +- RETURN fparser tree or PSyIR diff --git a/doc/user_guide/configuration.rst b/doc/user_guide/configuration.rst index 7d059f5dbd..d4b3cc92be 100644 --- a/doc/user_guide/configuration.rst +++ b/doc/user_guide/configuration.rst @@ -76,6 +76,10 @@ locations searched is now: 2. ``/share/psyclone/`` 3. ``${HOME}/.local/share/psyclone/`` +As a last resort, the location +``/config/`` +is searched in case PSyclone was installed in editable mode. + Note that for developers a slightly different configuration handling is implemented, see :ref:`dev_guide:dev_configuration` for details. diff --git a/psyclone.pdf b/psyclone.pdf index 461fa7dacf..56b8d4e313 100644 Binary files a/psyclone.pdf and b/psyclone.pdf differ diff --git a/src/psyclone/configuration.py b/src/psyclone/configuration.py index 94708405e8..5cf728a286 100644 --- a/src/psyclone/configuration.py +++ b/src/psyclone/configuration.py @@ -415,6 +415,7 @@ def find_file(): - ${HOME}/.local/share/psyclone/ - /share/psyclone/ - /share/psyclone/ + - /config/ :returns: the fully-qualified path to the configuration file :rtype: str @@ -449,9 +450,18 @@ def find_file(): if not within_virtual_env(): # 4. /share/psyclone/ _file_paths.append(share_dir) + # 5. /share/psyclone/ _file_paths.extend(pkg_share_dir) + # 6. /config/ + # Search for configuration file relative to this source file + dev_dir_tmp = os.path.dirname(__file__) + dev_dir_tmp = os.path.split(dev_dir_tmp)[0] # Go down one level + dev_dir_tmp = os.path.split(dev_dir_tmp)[0] # Go down another level + dev_path = os.path.join(dev_dir_tmp, "config") + _file_paths.append(dev_path) + for cfile in [os.path.join(cdir, _FILE_NAME) for cdir in _file_paths]: if os.path.isfile(cfile): return cfile diff --git a/src/psyclone/parse/__init__.py b/src/psyclone/parse/__init__.py index 76c3f98ed3..59ccfe2594 100644 --- a/src/psyclone/parse/__init__.py +++ b/src/psyclone/parse/__init__.py @@ -32,11 +32,12 @@ # POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- # Author: J. Henrichs, Bureau of Meteorology +# Modified: M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean-Kuntzmann '''This directory contains classes related to parsing Fortran. ''' -from psyclone.parse.file_info import FileInfo +from psyclone.parse.file_info import FileInfo, FileInfoFParserError from psyclone.parse.module_info import ModuleInfo, ModuleInfoError from psyclone.parse.module_manager import ModuleManager @@ -44,6 +45,7 @@ # For AutoAPI documentation generation. __all__ = [ 'FileInfo', + 'FileInfoFParserError', 'ModuleInfo', 'ModuleInfoError', 'ModuleManager' diff --git a/src/psyclone/parse/file_info.py b/src/psyclone/parse/file_info.py index e93dbe2d71..5cfd5c81ac 100644 --- a/src/psyclone/parse/file_info.py +++ b/src/psyclone/parse/file_info.py @@ -32,30 +32,105 @@ # POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- # Author: A. R. Porter, STFC Daresbury Laboratory. +# Modifications: M. Schreiber, Univ. Grenoble Alpes '''This module contains the FileInfo class. ''' +import hashlib +import copy import os +import pickle + +from fparser.two import Fortran2003 +from fparser.two.parser import ParserFactory + +from psyclone.psyir.frontend.fortran import FortranStringReader +from psyclone.configuration import Config +from psyclone.psyir.nodes import FileContainer +from psyclone.errors import PSycloneError +from psyclone.psyir.frontend.fparser2 import Fparser2Reader + + +class FileInfoFParserError(PSycloneError): + """Triggered when generation of FParser tree failed""" + + def __init__(self, value: str): + super().__init__(value) + self.value = "FileInfoFParserError: " + str(value) + + +class _CacheFileInfo: + """Class which is used to store all information + which can be cached to a file and read back from a file. + """ + def __init__(self): + # Hash sum + self._source_code_hash_sum: hashlib._Hash = None + + # Fparser tree + self._fparser_tree: Fortran2003.Program = None + + # Psyir node + self._psyir_node: FileContainer = None -# ============================================================================ class FileInfo: - '''This class stores mostly cached information about files: it stores - the original filename and, if requested, it will read the file and then - cache the (plain text) contents. + """This class stores mostly cached information about source code: + - it stores the original filename + - it will read the source of the file and cache it + - it will parse it with fparser and cache it + - it will construct the PSyIR (depends on TODO #2786 "and cache it") - :param str filename: the name of the source file (including path) that this - object holds information on. + :param filepath: Path to the file that this + object holds information on. Can also be set to 'None' in case of + providing fparser / PSyIR node in a different way. + :param use_caching: Use caching of intermediate representations - ''' - def __init__(self, filename): - self._filename = filename - # A cache for the source code: - self._source_code = None + """ + def __init__(self, filepath: str, use_caching: bool = False): + + # Full path to file + self._filename: str = filepath + + # Use cache features + self._use_caching: bool = use_caching + + # Source code: + self._source_code: str = None + + # Source code hash sum: + self._source_code_hash_sum: hashlib._Hash = None + + # Fparser node + self._fparser_tree: Fortran2003.Program = None + + # Flag indicating that, based on a previous attempt, + # the fparser tree can't be generated due to an error + self._fparser_tree_triggers_error: bool = False + + # Psyir node + self._psyir_node: FileContainer = None + + # Single cache file + (path, ext) = os.path.splitext(self._filename) + self._filepath_cache = path + ".psycache" + + # This reference to `_CacheFileInfo` is created when loading + # cached information from a cache file. + # In case the checksums mismatch, no object will be referenced. + # Consequently, this object will always have a checksum matching + # the one from the source code. + self._cache_data_load: _CacheFileInfo = None + + # This reference is used whenever writing cache data to the + # persistent storage. + # It will also be partly updated if the `psyir` or + # `fparser tree` was created in the meantime and a cache update + # is requested. + self._cache_data_save: _CacheFileInfo = None - # ------------------------------------------------------------------------ @property def basename(self): ''' @@ -63,10 +138,11 @@ def basename(self): that this FileInfo object represents. :rtype: str ''' + # Remove the path from the filename. - bname = os.path.basename(self._filename) + basename = os.path.basename(self._filename) # splitext returns (root, ext) and it's `root` that we want. - return os.path.splitext(bname)[0] + return os.path.splitext(basename)[0] # ------------------------------------------------------------------------ @property @@ -77,10 +153,8 @@ def filename(self): ''' return self._filename - # ------------------------------------------------------------------------ - @property - def contents(self): - '''Returns the contents of the file. The first time, it + def get_source_code(self, verbose: bool = False) -> str: + '''Returns the source code of the file. The first time, it will be read from the file, but the data is then cached. If any decoding errors are encountered then the associated character(s) @@ -88,16 +162,317 @@ def contents(self): Fortran source and the only way such characters can appear is if they are in comments. + :param verbose: If `True`, produce some verbose output + :returns: the contents of the file (utf-8 encoding). - :rtype: str ''' - if self._source_code is None: + if self._source_code: + return self._source_code + + if verbose: + # TODO #11: Use logging for this + print( + f"- Source file '{self._filename}': " + f"Loading source code" + ) + + try: # Specifying errors='ignore' simply skips any characters that # result in decoding errors. (Comments in a code may contain all # sorts of weird things.) - with open(self._filename, "r", encoding='utf-8', - errors='ignore') as file_in: + with open( + self._filename, "r", encoding="utf-8", errors="ignore" + ) as file_in: self._source_code = file_in.read() + except FileNotFoundError as err: + raise FileNotFoundError( + f"FileInfo: No such file or directory '{self._filename}'." + ) from err + + if self._use_caching: + # Only update if caching is used. + # Compute hash sum which will be used to + # check cache of fparser tree + self._source_code_hash_sum = hashlib.md5( + self._source_code.encode() + ).hexdigest() return self._source_code + + def _cache_load( + self, + verbose: bool = False, + ) -> _CacheFileInfo: + """Load fparser parse tree from the cache file if possible. + This also checks for matching checksums after loading the data + from the cache. + The checksum is based solely on a hashsum of the source code itself, + see code below. + + :param verbose: Produce some verbose output + """ + + if not self._use_caching: + return + + # Load the source code in case it's not yet loaded. + # This also fills in the hash sum + self.get_source_code() + + # Check whether cache was already loaded + if self._cache_data_load is not None: + return self._cache_data_load + + # Load cache file. + # Warning: There could be race conditions, e.g., in parallel builds. + # In the worst case some content is read which is incomplete or + # basically garbage. This will lead either to an Exception from the + # unpickling or a non-matching checksum which is both caught below. + try: + filehandler = open(self._filepath_cache, "rb") + except FileNotFoundError: + if verbose: + # TODO #11: Use logging for this + print(f" - No cache file '{self._filepath_cache}' found") + return None + + # Unpack cache file + try: + cache: _CacheFileInfo = pickle.load(filehandler) + except Exception as ex: + print(f" - Error while reading cache file - ignoring: {str(ex)}") + return None + + # Verify checksums + if cache._source_code_hash_sum != self._source_code_hash_sum: + if verbose: + # TODO #11: Use logging for this + print( + f" - Cache hashsum mismatch: " + f"source {self._source_code_hash_sum} " + f"vs. cache {cache._source_code_hash_sum}" + ) + return None + + self._cache_data_load = cache + + def _cache_save( + self, + verbose: bool = False, + ) -> None: + """Save the following elements to a cache file: + - hash sum of code + - fparser tree + - in future work, potentially also psyir nodes too + (requires TODO #2786). + + :param verbose: Produce some verbose output + """ + + if not self._use_caching: + return None + + if self._source_code_hash_sum is None: + # Nothing to cache + return None + + cache_updated = False + if self._cache_data_save is None: + # Cache doesn't exist => prepare data to write to file + self._cache_data_save = _CacheFileInfo() + self._cache_data_save._source_code_hash_sum = ( + self._source_code_hash_sum) + + if ( + self._cache_data_save._fparser_tree is None + and self._fparser_tree is not None + ): + # No fparser tree was loaded so far into the cache object + # AND + # an fparser tree was loaded and stored to Fileinfo. + # + # Consequently, we cache the fparser tree to the cache file. + # We create a deepcopy of this fparser tree to ensure that we cache + # the original tree and not a modified fparser tree node if the + # cache is updated (based on potentially future work of also + # caching the PSyIR) + + self._cache_data_save._fparser_tree = \ + copy.deepcopy(self._fparser_tree) + cache_updated = True + + if self._cache_data_save._psyir_node is None and ( + self._psyir_node is not None): + # TODO #2786: Serialization of psyir tree not possible + # + # E.g., this call fails: copy.deepcopy(self._psyir_node) + # + # Uncomment this code if serialization of psyir tree is + # possible and it will work. + # self._cache._psyir_node = copy.deepcopy(self._psyir_node) + # cache_updated = True + pass + + if not cache_updated: + return None + + # Open cache file + try: + # Atomically attempt to open the new kernel file (in case + # this is part of a parallel build) + # We first remove the cache file and then open it. + # If the file exists, it throws an exception. + # This is not a perfect solution, but avoids parallel + # writing access of the same file. + + # We first remove a potentially existing file + try: + os.remove(self._filepath_cache) + except FileNotFoundError: + pass + + # Then we open it in exclusive mode. + # If it already exists, an exception would be raised. + fd = os.open(self._filepath_cache, + os.O_CREAT | os.O_WRONLY | os.O_EXCL) + + filehandler = os.fdopen(fd, "wb") + except Exception as err: + if verbose: + # TODO #11: Use logging for this + print(" - Unable to write to cache file: " + str(err)) + return None + + # Dump to cache file + try: + pickle.dump(self._cache_data_save, filehandler) + except Exception as err: + # Invalidate cache + self._cache_data_save = None + print("Error while storing cache data - ignoring: " + str(err)) + return None + + if verbose: + print( + f" - Cache file updated with " + f"hashsum '{self._cache_data_save._source_code_hash_sum}" + ) + + def get_fparser_tree( + self, + verbose: bool = False, + save_to_cache_if_cache_active: bool = True + ) -> Fortran2003.Program: + """Returns the fparser Fortran2003.Program representation of the + source code (including Fortran2008). + + :param save_to_cache_if_cache_active: Cache is updated if fparser was + not loaded from cache. + :param verbose: Produce some verbose output + + :returns: fparser representation. + + :raises FileInfoFParserError: if fparser had issues + """ + if self._fparser_tree is not None: + return self._fparser_tree + + if self._fparser_tree_triggers_error: + # Raises an exception if we were not able to create the + # fparser tree before. + raise FileInfoFParserError( + "Failed to create fparser tree (previous attempt failed)" + ) + + if verbose: + # TODO #11: Use logging for this + print(f"- Source file '{self._filename}': " f"Running fparser") + + try: + source_code = self.get_source_code() + except FileNotFoundError as err: + raise FileInfoFParserError( + f"File '{self._filename}' not found:\n{str(err)}") + + # Check for cache + self._cache_load(verbose=verbose) + + if self._cache_data_load is not None: + if self._cache_data_load._fparser_tree is not None: + if verbose: + # TODO #11: Use logging for this + print( + f" - Using cache of fparser tree with hashsum" + f" {self._cache_data_load._source_code_hash_sum}" + ) + + # Use cached version + self._fparser_tree = self._cache_data_load._fparser_tree + return self._fparser_tree + + try: + reader = FortranStringReader( + source_code, include_dirs=Config.get().include_paths + ) + parser = ParserFactory().create(std="f2008") + self._fparser_tree = parser(reader) + + except Exception as err: + self._fparser_tree_triggers_error = True + raise FileInfoFParserError( + "Failed to create fparser tree: " + str(err) + ) from err + + # We directly call the cache saving routine here in case that the + # fparser tree will be modified later on. + if save_to_cache_if_cache_active: + self._cache_save(verbose=verbose) + + return self._fparser_tree + + def get_psyir(self, verbose: bool = False) -> FileContainer: + """Returns the PSyIR FileContainer of the file. + + :param verbose: Produce some verbose output + + :returns: PSyIR file container node. + + """ + if self._psyir_node is not None: + return self._psyir_node + + # Check for cache + self._cache_load(verbose=verbose) + + if self._cache_data_load is not None: + if self._cache_data_load._psyir_node is not None: + # Use cached version + if verbose: + # TODO #11: Use logging for this + print(" - Using cache of PSyIR") + + self._psyir_node = self._cache_data_load._psyir_node + return self._psyir_node + + if verbose: + # TODO #11: Use logging for this + print(f" - Running psyir for '{self._filename}'") + + # First, we get the fparser tree + fparse_tree = self.get_fparser_tree( + verbose=verbose, + # TODO #2786: If this TODO is resolved, set this to False + # and uncomment the self._cache_save below. + save_to_cache_if_cache_active=True + ) + + # We generate PSyIR from the fparser tree + _, filename = os.path.split(self.filename) + processor = self._processor = Fparser2Reader() + self._psyir_node = processor.generate_psyir(fparse_tree, filename) + + # TODO #2786: Uncomment if psyir nodes are serializable + # self._cache_save(verbose=verbose) + + return self._psyir_node diff --git a/src/psyclone/parse/module_info.py b/src/psyclone/parse/module_info.py index 2060bad635..8db153a146 100644 --- a/src/psyclone/parse/module_info.py +++ b/src/psyclone/parse/module_info.py @@ -33,92 +33,102 @@ # ----------------------------------------------------------------------------- # Authors: J. Henrichs, Bureau of Meteorology # A. R. Porter, STFC Daresbury Laboratory +# Modified: M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean-Kuntzmann '''This module contains the ModuleInfo class, which is used to store -and cache information about a module. +information about a module. + +It makes use of the FileInfo class which is then used for caching. ''' -from fparser.common.readfortran import FortranStringReader +from typing import Dict, List, Union + from fparser.two import Fortran2003 -from fparser.two.parser import ParserFactory -from fparser.two.utils import FortranSyntaxError, walk +from fparser.two.utils import walk -from psyclone.configuration import Config from psyclone.errors import InternalError, PSycloneError, GenerationError -from psyclone.psyir.frontend.fparser2 import Fparser2Reader from psyclone.psyir.nodes import Container -from psyclone.psyir.symbols import SymbolError +from psyclone.psyir.symbols import Symbol +from psyclone.parse import FileInfo, FileInfoFParserError + +from fparser.two.Fortran2003 import Program -# ============================================================================ class ModuleInfoError(PSycloneError): - ''' + """ PSyclone-specific exception for use when an error with the module manager happens - typically indicating that some module information cannot be found. :param str value: the message associated with the error. - ''' + """ def __init__(self, value): PSycloneError.__init__(self, value) - self.value = "ModuleInfo error: "+str(value) + self.value = "ModuleInfoError: " + str(value) -# ============================================================================ class ModuleInfo: # pylint: disable=too-many-instance-attributes - '''This class stores mostly cached information about a Fortran module. - It stores a FileInfo object holding details on the original source file. - If required it will parse this file and then cache the fparser2 parse tree. - Similarly, it will also process this parse tree to - create the corresponding PSyIR which is also then cached. - - :param str name: the module name. - :param finfo: object holding information on the source file which defines - this module. - :type finfo: :py:class:`psyclone.parse.FileInfo` + '''This class stores mostly memory-cached information about a Fortran + module. + It stores a FileInfo object holding details on the original source file, + the fparser2 parse tree and the PSyIR. + Memory-caching is always used in FileInfo with an opt-in feature of + caching the fparser parse tree to the file system. + + :param module_name: Name of the module + :param file_info: FileInfo object. The fparser tree, PSyIR + and module information will be loaded from this file if it's + not yet available. ''' - def __init__(self, name, finfo): - self._name = name.lower() - self._file_info = finfo + def __init__( + self, + module_name: str, + file_info: FileInfo + ): + if not isinstance(module_name, str): + raise TypeError("Expected type 'str' for argument 'module_name'") + + if not isinstance(file_info, FileInfo): + raise TypeError("Expected type 'FileInfo' for" + " argument 'file_info'") - # A cache for the fparser tree - self._parse_tree = None + self._name = module_name.lower() - # Whether we've attempted to parse the source. - self._parse_attempted = False + # File handler including fparser and psyir representation + self._file_info: FileInfo = file_info - # A cache for the PSyIR representation - self._psyir = None + # The PSyIR representation + self._psyir_container_node: Container = None # A cache for the module dependencies: this is just a set - # of all modules used by this module. Type: set[str] - self._used_modules = None + # of all modules USEd by this module. + # We use an (ordered) list to preserve the order of the + # used modules + self._used_module_names: List[str] = None # This is a dictionary containing the sets of symbols imported from # each module, indexed by the module names: dict[str, set[str]]. - self._used_symbols_from_module = None - - self._processor = Fparser2Reader() + self._map_module_name_to_used_symbols: Dict[str, set[str]] = None # ------------------------------------------------------------------------ @property - def name(self): + def name(self) -> str: ''':returns: the name of this module. - :rtype: str ''' return self._name # ------------------------------------------------------------------------ @property - def filename(self): - ''':returns: the filename that contains the source code for this + def filename(self) -> str: + ''' + + :returns: the filepath that contains the source code for this module. - :rtype: str ''' return self._file_info.filename @@ -135,63 +145,53 @@ def get_source_code(self): ''' try: - return self._file_info.contents + return self._file_info.get_source_code() except FileNotFoundError as err: raise ModuleInfoError( f"Could not find file '{self._file_info.filename}' when trying" f" to read source code for module '{self._name}'") from err # ------------------------------------------------------------------------ - def get_parse_tree(self): - '''Returns the fparser AST for this module. The first time, the file - will be parsed by fparser using the Fortran 2008 standard. The AST is - then cached for any future uses. + def get_fparser_tree(self) -> Union[Program, None]: + '''Returns the fparser AST for this module. - :returns: the fparser AST for this module. - :rtype: :py:class:`fparser.two.Fortran2003.Program` + :returns: The fparser AST for this module. + :raises ModuleInfoError: When errors have been triggered while + creating the fparser tree. ''' - if not self._parse_attempted: - # This way we avoid that any other function might trigger to - # parse this file again (in case of parsing errors). - self._parse_attempted = True - - reader = FortranStringReader( - self.get_source_code(), - include_dirs=Config.get().include_paths) - parser = ParserFactory().create(std="f2008") - self._parse_tree = parser(reader) + try: + return self._file_info.get_fparser_tree() - return self._parse_tree + except FileInfoFParserError as err: + raise ModuleInfoError( + f"Error(s) getting fparser tree of file '{self.filename}'" + f" for module '{self.name}':\n" + + str(err)) from err # ------------------------------------------------------------------------ def _extract_import_information(self): '''This internal function analyses a given module source file and caches which modules are imported (in self._used_modules), and which symbol is imported from each of these modules (in - self._used_symbols_from_module). + self._used_symbols_from_module_name). ''' - # Initialise the caches: - self._used_modules = set() - self._used_symbols_from_module = {} - try: - parse_tree = self.get_parse_tree() - except FortranSyntaxError: - # TODO #11: Add proper logging - # TODO #2120: Handle error - print(f"[ModuleInfo._extract_import_information] Syntax error " - f"parsing '{self.filename} - ignored") - # Hide syntax errors - return + # Initialise the caches + self._used_module_names = [] + self._map_module_name_to_used_symbols = {} + + parse_tree = self.get_fparser_tree() + for use in walk(parse_tree, Fortran2003.Use_Stmt): # Ignore intrinsic modules: if str(use.items[0]) == "INTRINSIC": continue mod_name = str(use.items[2]) - self._used_modules.add(mod_name) + if mod_name not in self._used_module_names: + self._used_module_names.append(mod_name) all_symbols = set() only_list = use.items[4] @@ -202,10 +202,10 @@ def _extract_import_information(self): for symbol in only_list.children: all_symbols.add(str(symbol)) - self._used_symbols_from_module[mod_name] = all_symbols + self._map_module_name_to_used_symbols[mod_name] = all_symbols # ------------------------------------------------------------------------ - def get_used_modules(self): + def get_used_modules(self) -> List[str]: '''This function returns a set of all modules `used` in this module. Fortran `intrinsic` modules will be ignored. The information is based on the fparser parse tree of the module (since fparser can @@ -216,10 +216,10 @@ def get_used_modules(self): :rtype: set[str] ''' - if self._used_modules is None: + if self._used_module_names is None: self._extract_import_information() - return self._used_modules + return self._used_module_names # ------------------------------------------------------------------------ def get_used_symbols_from_modules(self): @@ -233,12 +233,11 @@ def get_used_symbols_from_modules(self): :rtype: dict[str, set[str]] ''' - if self._used_symbols_from_module is None: + if self._map_module_name_to_used_symbols is None: self._extract_import_information() - return self._used_symbols_from_module + return self._map_module_name_to_used_symbols - # ------------------------------------------------------------------------ def get_psyir(self): '''Returns the PSyIR representation of this module. This is based on the fparser tree (see get_parse_tree), and the information is @@ -255,33 +254,31 @@ def get_psyir(self): exist in the PSyIR. ''' - if self._psyir is None: + if self._psyir_container_node is None: try: - ptree = self.get_parse_tree() - except FortranSyntaxError as err: + self._file_info.get_fparser_tree() + except FileInfoFParserError as err: # TODO #11: Add proper logging print(f"Error parsing '{self.filename}': '{err}'") return None - if not ptree: - # TODO #11: Add proper logging - print(f"Empty parse tree returned for '{self.filename}'") - return None + try: - self._psyir = self._processor.generate_psyir(ptree) - except (KeyError, SymbolError, InternalError, GenerationError) \ - as err: + self._psyir_container_node = self._file_info.get_psyir() + except ( + GenerationError, + FileInfoFParserError) as err: # TODO #11: Add proper logging print(f"Error trying to create PSyIR for '{self.filename}': " f"'{err}'") return None # Return the Container with the correct name. - for cntr in self._psyir.walk(Container): - if cntr.name.lower() == self.name: - return cntr + for container_node in self._psyir_container_node.walk(Container): + if container_node.name.lower() == self.name: + return container_node # We failed to find the Container - double-check the parse tree - for mod_stmt in walk(self.get_parse_tree(), Fortran2003.Module_Stmt): + for mod_stmt in walk(self.get_fparser_tree(), Fortran2003.Module_Stmt): if mod_stmt.children[1].string.lower() == self.name: # The module exists but we couldn't create PSyIR for it. # TODO #11: Add proper logging @@ -294,28 +291,48 @@ def get_psyir(self): f"module named '{self.name}'") # ------------------------------------------------------------------------ - def get_symbol(self, name): + def get_symbol(self, name: str) -> Union[Symbol, None]: ''' Gets the PSyIR Symbol with the supplied name from the Container - representing this Module (if available). + representing this Fortran module (if available). This utility mainly exists to insulate the user from having to check that a Container has been successfully created for this module (it might not have been if the source of the module cannot be found or cannot be parsed) and that it contains the specified Symbol. - :param str name: the name of the symbol to get from this module. + :param name: the name of the symbol to get from this module. :returns: the Symbol with the supplied name if the Container has been successfully created and contains such a symbol and None otherwise. - :rtype: :py:class:`psyclone.psyir.symbols.Symbol` | None ''' - container = self.get_psyir() - if not container: + try: + container: Container = self.get_psyir() + + # Handle "alternative" implementation of get_psyir_container_mode" + if container is None: + # PSyIR could not be obtained so cannot search for Symbol + return None + + except (FileNotFoundError, FileInfoFParserError, GenerationError): return None + try: return container.symbol_table.lookup(name) except KeyError: return None + + def view_tree(self, indent=""): + """ + Show the module information with markdown style in a tree-like + structure supporting indentation. + + :param str indent: the string to use for indentation. + """ + retstr = "" + retstr += f"{indent}- name: '{self.name}'\n" + retstr += (f"{indent}- used_module_names:" + f" {self.get_used_modules()}\n") + return retstr diff --git a/src/psyclone/parse/module_manager.py b/src/psyclone/parse/module_manager.py index b10fbcdf99..9f9076694f 100644 --- a/src/psyclone/parse/module_manager.py +++ b/src/psyclone/parse/module_manager.py @@ -45,14 +45,17 @@ from psyclone.errors import InternalError from psyclone.parse.file_info import FileInfo -from psyclone.parse.module_info import ModuleInfo +from psyclone.parse.module_info import ModuleInfo, ModuleInfoError class ModuleManager: '''This class implements a singleton that manages module dependencies. + :param use_caching: Whether to use (`True`) or + disable (`False`) caching ''' + # Class variable to store the singleton instance _instance = None @@ -62,23 +65,36 @@ class ModuleManager: # ------------------------------------------------------------------------ @staticmethod - def get(): + def get(use_caching: bool = None): '''Static function that if necessary creates and returns the singleton ModuleManager instance. + :param use_caching: If `True`, a file-based caching of the fparser + tree will be used. This can significantly accelerate obtaining + a PSyIR from a source file. + For parallel builds, parallel race conditions to the cache file + can happen, but this shouldn't lead to wrong results. However, + that's untested so far. + ''' if not ModuleManager._instance: - ModuleManager._instance = ModuleManager() + ModuleManager._instance = ModuleManager(use_caching) return ModuleManager._instance # ------------------------------------------------------------------------ - def __init__(self): + def __init__( + self, + use_caching: bool = None + ): if ModuleManager._instance is not None: raise InternalError("You need to use 'ModuleManager.get()' " "to get the singleton instance.") + # Disable caching by default + self._use_caching = use_caching if use_caching is not None else False + self._modules = {} self._visited_files = {} @@ -149,7 +165,8 @@ def _add_all_files_from_dir(self, directory): full_path = os.path.join(directory, entry.name) if full_path in self._visited_files: continue - self._visited_files[full_path] = FileInfo(full_path) + self._visited_files[full_path] = \ + FileInfo(full_path, use_caching=self._use_caching) new_files.append(self._visited_files[full_path]) return new_files @@ -170,6 +187,7 @@ def _find_module_in_files(self, name, file_list): ''' mod_info = None for finfo in file_list: + finfo: FileInfo # We only proceed to read a file to check for a module if its # name is sufficiently similar to that of the module. score = SequenceMatcher(None, @@ -227,7 +245,7 @@ def get_module_info(self, module_name): # First check if we have already seen this module. We only end the # search early if the file we've found does not require pre-processing # (i.e. has a .f90 suffix). - mod_info = self._modules.get(mod_lower, None) + mod_info: ModuleInfo = self._modules.get(mod_lower, None) if mod_info and mod_info.filename.endswith(".f90"): return mod_info old_mod_info = mod_info @@ -260,7 +278,7 @@ def get_module_info(self, module_name): f"command line option.") # ------------------------------------------------------------------------ - def get_modules_in_file(self, finfo): + def get_modules_in_file(self, finfo: FileInfo): ''' Uses a regex search to find all modules defined in the file with the supplied name. @@ -276,8 +294,9 @@ def get_modules_in_file(self, finfo): # could be defeated by e.g. # module & # my_mod - # `finfo.contents` will read the file if it hasn't already been cached. - mod_names = self._module_pattern.findall(finfo.contents) + # `finfo.get_source_code()` will read the file if it hasn't already + # been cached. + mod_names = self._module_pattern.findall(finfo.get_source_code()) return [name.lower() for name in mod_names] @@ -328,7 +347,9 @@ def get_all_dependencies_recursively(self, all_mods): continue try: mod_deps = self.get_module_info(module).get_used_modules() - except FileNotFoundError: + # Convert to set since we continue with a set + mod_deps = set(mod_deps) + except (FileNotFoundError, ModuleInfoError): if module not in not_found: # We don't have any information about this module, # ignore it. diff --git a/src/psyclone/psyir/nodes/call.py b/src/psyclone/psyir/nodes/call.py index 96920dba7a..f49ce59577 100644 --- a/src/psyclone/psyir/nodes/call.py +++ b/src/psyclone/psyir/nodes/call.py @@ -766,7 +766,6 @@ def get_callee( ''' routine_list = self.get_callees() - assert len(routine_list) != 0 err_info_list = [] diff --git a/src/psyclone/tests/config_test.py b/src/psyclone/tests/config_test.py index 7367c9b718..433dcb6a53 100644 --- a/src/psyclone/tests/config_test.py +++ b/src/psyclone/tests/config_test.py @@ -262,15 +262,24 @@ def test_search_path(monkeypatch): pkg_share_idx = min(err_msg.find(dir) for dir in pkg_share_dir) assert pkg_share_idx != -1 + # source directory to be used for installation in editable mode + dev_dir_tmp = os.path.dirname(__file__) + dev_dir_tmp = os.path.split(dev_dir_tmp)[0] # Go down one level + dev_dir_tmp = os.path.split(dev_dir_tmp)[0] # Go down another level + dev_dir_tmp = os.path.split(dev_dir_tmp)[0] # Go down another level + dev_dir = os.path.join(dev_dir_tmp, "config") # Add config + dev_idx = err_msg.find(dev_dir) + assert dev_idx != -1 + # Check the order of the various directories, which depends on # whether we are in a virtualenv or not: if inside_venv: # When inside a virtual environment, the 'share' directory of # that environment takes precedence over the user's home # directory - assert cwd_idx < share_idx < home_idx < pkg_share_idx + assert cwd_idx < share_idx < home_idx < pkg_share_idx < dev_idx else: - assert cwd_idx < home_idx < share_idx < pkg_share_idx + assert cwd_idx < home_idx < share_idx < pkg_share_idx < dev_idx @pytest.mark.usefixtures("change_into_tmpdir") diff --git a/src/psyclone/tests/parse/file_info_test.py b/src/psyclone/tests/parse/file_info_test.py index ca249a6562..2f65be3eac 100644 --- a/src/psyclone/tests/parse/file_info_test.py +++ b/src/psyclone/tests/parse/file_info_test.py @@ -34,21 +34,29 @@ # Author: A. R. Porter, STFC Daresbury Laboratory. -'''Module containing tests for the FileInfo class.''' +"""Module containing tests for the FileInfo class.""" import os import pytest +from psyclone.psyir.nodes import Node from psyclone.parse import FileInfo +SOURCE_DUMMY = """\ +program main + real :: a + a = 0.0 +end program main +""" + def test_file_info_constructor(): - ''' + """ Check that a FileInfo can be constructed and that its initial properties are set correctly. - ''' - finfo = FileInfo("missing.txt") + """ + finfo: FileInfo = FileInfo("missing.txt") assert finfo._filename == "missing.txt" assert finfo._source_code is None assert finfo.filename == "missing.txt" @@ -56,46 +64,456 @@ def test_file_info_constructor(): def test_file_info_missing_file(): - ''' + """ Test FileInfo.source() raises the expected exception if the file cannot be found. - ''' + """ finfo = FileInfo("missing.txt") with pytest.raises(FileNotFoundError) as err: - _ = finfo.contents + _ = finfo.get_source_code() assert "'missing.txt'" in str(err.value) -def test_file_info_content(tmpdir): - ''' - Check that FileInfo.content() successfully reads a file and that - the results are cached. - - ''' +def test_file_info_cached_source_code(tmpdir): + """ + Check that the contents of the file have been cached + and that the cache was used if reading it a 2nd time. + """ fname = os.path.join(tmpdir, "a_file.txt") - content = "Just\nA\nTest" - with open(fname, "w", encoding='utf-8') as fout: + content = "module andy\n\nend module" + with open(fname, "w", encoding="utf-8") as fout: fout.write(content) - finfo = FileInfo(fname) - input1 = finfo.contents + finfo = FileInfo(fname, use_caching=True) + input1 = finfo.get_source_code() assert input1 == content # Check that the contents have been cached. - input2 = finfo.contents + input2 = finfo.get_source_code() assert input2 is input1 + assert finfo._source_code_hash_sum is not None + assert finfo._cache_data_load is None + assert finfo._cache_data_save is None + + # Load fparser tree to start caching + finfo.get_fparser_tree() + assert finfo._cache_data_save is not None + + finfo = FileInfo(fname, use_caching=True) + input1 = finfo.get_fparser_tree() + assert finfo._cache_data_load is not None + assert finfo._cache_data_save is None + + +def test_file_info_no_cached_source_code(tmpdir): + """ + Check that the contents of the file have not been cached. + """ + fname = os.path.join(tmpdir, "a_file.txt") + content = "module andy\n\nend module" + with open(fname, "w", encoding="utf-8") as fout: + fout.write(content) + finfo = FileInfo(fname) + finfo.get_source_code() + assert finfo._source_code_hash_sum is None + assert finfo._cache_data_load is None + assert finfo._cache_data_save is None + + # Load fparser tree and check that no caching is performed. + finfo.get_fparser_tree() + assert finfo._cache_data_save is None + + finfo = FileInfo(fname) + finfo.get_fparser_tree() + assert finfo._cache_data_load is None + assert finfo._cache_data_save is None def test_file_info_decode_error(tmpdir): - ''' - Check that FileInfo.contents() handles a decoding error when reading + """ + Check that FileInfo.get_source_code() handles a decoding error when reading a file. - ''' + """ fname = os.path.join(tmpdir, "a_file.txt") # Test content includes a byte that cannot be decoded as utf-8. content = "Ju\xb5st\nA\nTest" - with open(fname, "w", encoding='latin') as fout: + with open(fname, "w", encoding="latin") as fout: fout.write(content) finfo = FileInfo(fname) # Content of file has been read with problematic byte skipped. - assert finfo.contents == "Just\nA\nTest" + assert finfo.get_source_code() == "Just\nA\nTest" + + +def test_file_info_source_fparser_psyir(tmpdir): + """ + Check that read source, fparser and psyir always result + in the same objects. + + """ + filename = os.path.join(tmpdir, "testfile_a.f90") + try: + os.remove(filename) + except FileNotFoundError: + pass + with open(filename, "w", encoding="utf-8") as fout: + fout.write(SOURCE_DUMMY) + + file_info: FileInfo = FileInfo(filename, use_caching=True) + + source_code = file_info.get_source_code(verbose=True) + source_code2 = file_info.get_source_code(verbose=True) + assert source_code is source_code2 + + fparser_tree = file_info.get_fparser_tree(verbose=True) + fparser_tree2 = file_info.get_fparser_tree(verbose=True) + assert fparser_tree is fparser_tree2 + + psyir_node = file_info.get_psyir(verbose=True) + psyir_node2 = file_info.get_psyir(verbose=True) + assert psyir_node is psyir_node2 + + # For coverage check + file_info._cache_save(verbose=True) + + +def test_file_info_load_from_cache(tmpdir): + """ + Check that the caching to file really works. + + """ + filename = os.path.join(tmpdir, "testfile_b.f90") + filename_cache = os.path.join(tmpdir, "testfile_b.psyclone") + + try: + os.remove(filename) + except FileNotFoundError: + pass + + try: + os.remove(filename_cache) + except FileNotFoundError: + pass + + with open(filename, "w", encoding="utf-8") as fout: + fout.write(SOURCE_DUMMY) + + file_info: FileInfo = FileInfo(filename, use_caching=True) + + # Call save_cache just for code coverage + file_info._cache_save(verbose=True) + file_info._cache_load(verbose=True) + + # No cache exists + assert file_info._cache_data_load is None + assert file_info._cache_data_save is None + + # Load file which triggers storing things to cache + psyir_node = file_info.get_psyir(verbose=True) + assert isinstance(psyir_node, Node) + + # Save cache is used + assert file_info._cache_data_load is None + assert file_info._cache_data_save is not None + + # Load new file + file_info: FileInfo = FileInfo(filename, use_caching=True) + + # No cache used + assert file_info._cache_data_load is None + assert file_info._cache_data_save is None + + # Load file which triggers storing things to cache + psyir_node = file_info.get_psyir(verbose=True) + assert isinstance(psyir_node, Node) + + # Loaded and saved to cache + assert file_info._cache_data_load is not None + assert file_info._cache_data_save is None + + +def test_file_info_load_from_cache_corrupted(tmpdir): + """ + Test handling of corrupt cache file + + """ + filename = os.path.join(tmpdir, "testfile_c.f90") + filename_cache = os.path.join(tmpdir, "testfile_c.psyclone") + + try: + os.remove(filename) + except FileNotFoundError: + pass + + try: + os.remove(filename_cache) + except FileNotFoundError: + pass + + with open(filename, "w", encoding="utf-8") as fout: + fout.write(SOURCE_DUMMY) + + filename_cache = os.path.join(tmpdir, "testfile_c.psycache") + with open(filename_cache, "w", encoding="utf-8") as fout: + fout.write("GARBAGE") + + # + # Step 1) Load with corrupted cache file + # + + file_info: FileInfo = FileInfo(filename, use_caching=True) + + # No cache exists + assert file_info._cache_data_load is None + assert file_info._cache_data_save is None + + # Load with damaged cache file + psyir_node = file_info.get_psyir(verbose=True) + assert isinstance(psyir_node, Node) + + # No cache exists + assert file_info._cache_data_load is None + assert file_info._cache_data_save is not None + + # + # Step 2) Cache file should now be restored + # + + file_info: FileInfo = FileInfo(filename, use_caching=True) + + # No cache exists + assert file_info._cache_data_load is None + assert file_info._cache_data_save is None + + # Load with damaged cache file + psyir_node = file_info.get_psyir(verbose=True) + assert isinstance(psyir_node, Node) + + # Cache was loaded + assert file_info._cache_data_load is not None + + # Cache not updated + assert file_info._cache_data_save is None + + +def test_file_info_source_changed(tmpdir): + """ + Make sure that cache is not used if source code + file changed, hence, its checksum. + + """ + filename = os.path.join(tmpdir, "testfile_d.f90") + filename_cache = os.path.join(tmpdir, "testfile_d.psyclone") + + try: + os.remove(filename) + except FileNotFoundError: + pass + + try: + os.remove(filename_cache) + except FileNotFoundError: + pass + + with open(filename, "w", encoding="utf-8") as fout: + fout.write(SOURCE_DUMMY) + + # + # Step 1) Create cache file + # + + file_info: FileInfo = FileInfo(filename, use_caching=True) + psyir_node = file_info.get_psyir(verbose=True) + assert isinstance(psyir_node, Node) + + assert file_info._cache_data_load is None + assert file_info._cache_data_save is not None + + # + # Step 2) Change source code + # + + with open(filename, "w", encoding="utf-8") as fout: + fout.write(SOURCE_DUMMY + "\n! I'm a comment to change the hashsum") + + # + # Step 3) Cache file should not be used + # + + file_info: FileInfo = FileInfo(filename, use_caching=True) + + # Load, but not from cache + psyir_node = file_info.get_psyir(verbose=True) + assert isinstance(psyir_node, Node) + + # Cache was not loaded + assert file_info._cache_data_load is None + + # Cache updated + assert file_info._cache_data_save is not None + + +def test_file_info_source_with_bugs(tmpdir): + """ + Make sure that error is triggered if there's an error. + + """ + filename = os.path.join(tmpdir, "testfile_bug.f90") + filename_cache = os.path.join(tmpdir, "testfile_bug.psyclone") + + try: + os.remove(filename) + except FileNotFoundError: + pass + + try: + os.remove(filename_cache) + except FileNotFoundError: + pass + + with open(filename, "w", encoding="utf-8") as fout: + fout.write(SOURCE_DUMMY + "arbitrary words to trigger error") + + file_info: FileInfo = FileInfo(filename, use_caching=True) + + from psyclone.parse.file_info import FileInfoFParserError + + with pytest.raises(FileInfoFParserError) as einfo: + file_info.get_psyir(verbose=True) + + assert ( + "FileInfoFParserError: Failed to create fparser tree: at line 5" + in (str(einfo.value)) + ) + + # Call it a 2nd time for coverage of not attempting to create it a 2nd time + with pytest.raises(FileInfoFParserError) as einfo: + file_info.get_psyir(verbose=True) + + +def test_file_info_cachefile_not_accessible(tmpdir): + """ + Check if cachefile is not accessible + + """ + filename = os.path.join(tmpdir, "testfile_e.f90") + + try: + os.remove(filename) + except FileNotFoundError: + pass + with open(filename, "w", encoding="utf-8") as fout: + fout.write(SOURCE_DUMMY) + + file_info: FileInfo = FileInfo(filename, use_caching=True) + + # Set buggy cache file + file_info._filepath_cache = "/I_DONT_EXIST/FILE/cache.psycache" + + source_code = file_info.get_source_code(verbose=True) + assert source_code == SOURCE_DUMMY + + psyir_node = file_info.get_psyir(verbose=True) + assert isinstance(psyir_node, Node) + + +def test_file_info_cachefile_pickle_load_exception(tmpdir, monkeypatch): + """ + Check pickle exceptions work + + """ + filename = os.path.join(tmpdir, "testfile_f.f90") + with open(filename, "w", encoding="utf-8") as fout: + fout.write(SOURCE_DUMMY) + + file_info: FileInfo = FileInfo(filename, use_caching=True) + + psyir_node = file_info.get_psyir(verbose=True) + assert isinstance(psyir_node, Node) + + assert file_info._cache_data_load is None + assert file_info._cache_data_save is not None + + +def test_file_info_cachefile_pickle_dump_exception(tmpdir, monkeypatch): + """ + Check pickle exceptions work + + """ + filename = os.path.join(tmpdir, "testfile_f.f90") + with open(filename, "w", encoding="utf-8") as fout: + fout.write(SOURCE_DUMMY) + + file_info: FileInfo = FileInfo(filename, use_caching=True) + + def fun_exception(a, b): + raise Exception() + + monkeypatch.setattr("pickle.dump", fun_exception) + + psyir_node = file_info.get_psyir(verbose=True) + assert isinstance(psyir_node, Node) + + assert file_info._cache_data_load is None + assert file_info._cache_data_save is None + + +def test_file_info_source_psyir_test(tmpdir): + """ + Here, we generate a dummy psyir cached node to load it. + This is not yet possible (TODO #2786), but the code + for this already exists and is tested here. + + """ + filename = os.path.join(tmpdir, "testfile_g.f90") + filename_cache = os.path.join(tmpdir, "testfile_g.psycache") + + try: + os.remove(filename) + except FileNotFoundError: + pass + + try: + os.remove(filename_cache) + except FileNotFoundError: + pass + + with open(filename, "w", encoding="utf-8") as fout: + fout.write(SOURCE_DUMMY) + + # Create cache + file_info: FileInfo = FileInfo(filename, use_caching=True) + file_info.get_psyir() + + # Load again for coverage case + file_info.get_psyir() + + # Load from cache + file_info: FileInfo = FileInfo(filename, use_caching=True) + + fparser_tree = file_info.get_fparser_tree(verbose=True) + fparser_tree2 = file_info.get_fparser_tree(verbose=True) + assert fparser_tree is fparser_tree2 + + psyir_node = Node("dummy") + file_info._cache_data_load._psyir_node = psyir_node + + psyir_node2 = file_info.get_psyir(verbose=True) + assert psyir_node is psyir_node2 + + +def test_fparser_error(): + """ + Test that fparser raises an FileInfoFParserError + error if a file was not found + """ + file_info = FileInfo(filepath="dummy") + + # Catch special exception + from psyclone.parse.file_info import FileInfoFParserError + + with pytest.raises(FileInfoFParserError) as einfo: + file_info.get_fparser_tree() + + assert "FileInfo: No such file or directory 'dummy'" in str( + einfo.value + ) diff --git a/src/psyclone/tests/parse/module_info_test.py b/src/psyclone/tests/parse/module_info_test.py index 0a47a08988..c64419f1c7 100644 --- a/src/psyclone/tests/parse/module_info_test.py +++ b/src/psyclone/tests/parse/module_info_test.py @@ -48,19 +48,35 @@ from psyclone.tests.utilities import get_base_path -# ----------------------------------------------------------------------------- -@pytest.mark.usefixtures("change_into_tmpdir", "clear_module_manager_instance", +SOURCE_DUMMY = """\ +program main + real :: a + a = 0.0 +end program main +""" + + +@pytest.mark.usefixtures("change_into_tmpdir", + "clear_module_manager_instance", "mod_man_test_setup_directories") def test_module_info(): - '''Tests the module info object.''' + '''Tests the basic functionality of the module info object: + - Obtaining a `ModuleInfo` from a `FileInfo`. + - Obtaining the source code. + - Obtaining the PSyIR. + ''' mod_info = ModuleInfo("a_mod", FileInfo("file_for_a")) assert mod_info.filename == "file_for_a" assert mod_info.name == "a_mod" - with pytest.raises(ModuleInfoError) as err: - mod_info.get_parse_tree() - assert ("Could not find file 'file_for_a' when trying to read source " - "code for module 'a_mod'" in str(err.value)) + with pytest.raises(ModuleInfoError) as einfo: + mod_info.get_fparser_tree() + + assert ("ModuleInfoError: Error(s) getting fparser tree of file" + " 'file_for_a' for module 'a_mod'" in str(einfo.value)) + + assert ("FileInfoFParserError: File 'file_for_a' not found:" + in str(einfo.value)) # Try to read the file a_mod.f90, which is contained in the d1 directory mod_man = ModuleManager.get() @@ -77,10 +93,10 @@ def test_module_info(): assert "end module a_mod" in source_code # Now access the parse tree: - assert mod_info._parse_tree is None - parse_tree = mod_info.get_parse_tree() - assert mod_info._parse_tree is parse_tree - assert isinstance(mod_info._parse_tree, Fortran2003.Program) + assert mod_info._file_info._fparser_tree is None + parse_tree = mod_info.get_fparser_tree() + assert mod_info._file_info._fparser_tree is parse_tree + assert isinstance(mod_info._file_info._fparser_tree, Fortran2003.Program) # ----------------------------------------------------------------------------- @@ -118,7 +134,7 @@ def test_module_info_get_psyir(tmpdir, monkeypatch, capsys): broken = 2 end function broken end module my_mod''') - mod_info = ModuleInfo("my_mod", FileInfo(filepath)) + mod_info: ModuleInfo = ModuleInfo("my_mod", FileInfo(filepath)) psyir = mod_info.get_psyir() assert psyir is None out, _ = capsys.readouterr() @@ -126,8 +142,8 @@ def test_module_info_get_psyir(tmpdir, monkeypatch, capsys): # Check that we handle the case where get_parse_tree() returns None. # The simplest way to do this is to monkeypatch. - mod_info._psyir = None - monkeypatch.setattr(mod_info, "get_parse_tree", lambda: None) + mod_info._psyir_container_node = None + monkeypatch.setattr(mod_info, "get_fparser_tree", lambda: None) assert mod_info.get_psyir() is None @@ -156,7 +172,7 @@ def test_mod_info_get_psyir_wrong_file(tmpdir, capsys): # Break the PSyIR so that, while it is valid, it does not contain the named # module. mod_info = ModuleInfo("my_mod", FileInfo(filepath)) - mod_info._psyir = Container("other_mod") + mod_info._psyir_container_node = Container("other_mod") assert mod_info.get_psyir() is None out, _ = capsys.readouterr() assert ("my_mod.f90' does contain module 'my_mod' but PSyclone is unable " @@ -181,13 +197,13 @@ def test_mod_info_get_used_modules(): mod_man.add_search_path("d1") mod_man.add_search_path("d2") - assert mod_man.get_module_info("a_mod").get_used_modules() == set() - assert mod_man.get_module_info("b_mod").get_used_modules() == set() + assert mod_man.get_module_info("a_mod").get_used_modules() == list() + assert mod_man.get_module_info("b_mod").get_used_modules() == list() - mod_c_info = mod_man.get_module_info("c_mod") + mod_c_info: ModuleInfo = mod_man.get_module_info("c_mod") assert mod_c_info.name == "c_mod" dep = mod_c_info.get_used_modules() - assert dep == set(("a_mod", "b_mod")) + assert dep == ["a_mod", "b_mod"] dep_cached = mod_c_info.get_used_modules() # Calling the method a second time should return the same @@ -233,7 +249,7 @@ def test_mod_info_get_used_symbols_from_modules(): mod_man.add_search_path("d2") mod_info = mod_man.get_module_info("c_mod") - assert mod_info._used_symbols_from_module is None + assert mod_info._map_module_name_to_used_symbols is None used_symbols = mod_info.get_used_symbols_from_modules() assert used_symbols["a_mod"] == {"a_mod_symbol"} assert used_symbols["b_mod"] == {"b_mod_symbol"} @@ -249,17 +265,18 @@ def test_mod_info_get_psyir(capsys, tmpdir): '''This tests the handling of PSyIR representation of the module. ''' - mod_man = ModuleManager.get() + mod_man: ModuleManager = ModuleManager.get() dyn_path = get_base_path("lfric") mod_man.add_search_path(f"{dyn_path}/driver_creation", recursive=False) - mod_info = mod_man.get_module_info("testkern_import_symbols_mod") - assert mod_info._psyir is None - psyir = mod_info.get_psyir() + mod_info: ModuleInfo = mod_man.get_module_info( + "testkern_import_symbols_mod") + assert mod_info._psyir_container_node is None + psyir: Container = mod_info.get_psyir() assert isinstance(psyir, Container) assert psyir.name == "testkern_import_symbols_mod" # Make sure the PSyIR is cached: - assert mod_info._psyir.children[0] is psyir + assert mod_info._psyir_container_node.children[0] is psyir # Test that we get the cached value (and not a new instance) psyir_cached = mod_info.get_psyir() assert psyir_cached is psyir @@ -318,20 +335,27 @@ def test_module_info_extract_import_information_error(): `d2/error_mod.f90`, which is invalid Fortran. ''' - # TODO 2120: Once proper error handling is implemented, this should - # likely just raise an exception. + mod_man = ModuleManager.get() mod_man.add_search_path("d2") - mod_info = mod_man.get_module_info("error_mod") + mod_info: ModuleInfo = mod_man.get_module_info("error_mod") assert mod_info.name == "error_mod" - assert mod_info._used_modules is None - assert mod_info._used_symbols_from_module is None - mod_info._extract_import_information() + assert mod_info._used_module_names is None + assert mod_info._map_module_name_to_used_symbols is None + + with pytest.raises(ModuleInfoError) as einfo: + mod_info._extract_import_information() + + assert ("ModuleInfoError: Error(s) getting fparser tree of file" + " 'd2/error_mod.F90' for module 'error_mod':\n" + "FileInfoFParserError: Failed to create fparser tree: at line 4" + in str(einfo.value)) + # Make sure the internal attributes are set to not None to avoid # trying to parse them again later - assert mod_info._used_modules == set() - assert mod_info._used_symbols_from_module == {} + assert mod_info._used_module_names == list() + assert mod_info._map_module_name_to_used_symbols == {} # ----------------------------------------------------------------------------- @@ -347,11 +371,153 @@ def test_module_info_get_symbol(tmpdir, monkeypatch): end function myfunc1 end module my_mod''') - minfo = ModuleInfo("my_mod", FileInfo(filepath)) - assert isinstance(minfo.get_symbol("myfunc1"), RoutineSymbol) + module_info: ModuleInfo = ModuleInfo("my_mod", FileInfo(filepath)) + assert isinstance(module_info.get_symbol("myfunc1"), RoutineSymbol) # A Symbol that doesn't exist. - assert minfo.get_symbol("amos") is None - # When no Container has been created. Monkeypatch get_psyir() to simplify - # this. - monkeypatch.setattr(minfo, "get_psyir", lambda: None) - assert minfo.get_symbol("amos") is None + assert module_info.get_symbol("amos") is None + # When no Container has been created. Monkeypatch + # get_psyir() to simplify this. + + def raise_error(): + from psyclone.parse.file_info import FileInfoFParserError + raise FileInfoFParserError("Dummy error") + + monkeypatch.setattr( + module_info, + "get_psyir", + raise_error) + assert module_info.get_symbol("amos") is None + + +def test_module_info_viewtree(tmpdir): + """ + Coverage test: + - Set up ModuleInfo from FileInfo(filename) + - Directly call `view_tree()` + """ + + filename = os.path.join(tmpdir, "testfile_module_info_coverage.f90") + + # + # Get fparser + # + with open(filename, "w", encoding='utf-8') as fout: + fout.write(SOURCE_DUMMY) + + # We create a dummy + module_info: ModuleInfo = ModuleInfo( + "my_mod", + FileInfo(filename) + ) + + output = module_info.view_tree() + assert """\ +- name: 'my_mod' +- used_module_names: [] +""" == output + + +def test_module_info_get_source_code_missing_file(): + """ + Coverage test: + - Try to read from source file that doesn't exist + - Check for raised Exception + """ + + module_info: ModuleInfo = ModuleInfo( + "my_mod", + FileInfo("/tmp/source_not_found.f90") + ) + + with pytest.raises(ModuleInfoError) as einfo: + module_info.get_source_code() + + assert "Could not find file" in str(einfo.value) + + +def test_module_info_coverage_fparser_error(tmpdir): + """ + Coverage test: + - Create an .f90 file with wrong syntax + - Test for raised Exception if creating fparser tree. + """ + + filename = os.path.join(tmpdir, "testfile_module_info_a.f90") + + with open(filename, "w", encoding='utf-8') as fout: + fout.write(SOURCE_DUMMY) + + module_info: ModuleInfo = ModuleInfo( + "my_mod", FileInfo(filename)) + + module_info.get_fparser_tree() + + # + # Create error in source code + # + with open(filename, "w", encoding='utf-8') as fout: + fout.write(SOURCE_DUMMY+"\ncreate some error") + + module_info: ModuleInfo = ModuleInfo( + "my_mod", FileInfo(filename)) + + with pytest.raises(ModuleInfoError) as einfo: + module_info.get_fparser_tree() + + assert ("ModuleInfoError: Error(s) getting fparser tree of file" + in str(einfo.value)) + + with pytest.raises(ModuleInfoError) as einfo: + module_info.get_fparser_tree() + + assert ("Failed to create fparser tree " + "(previous attempt failed)" in + str(einfo.value)) + + +def test_minfo_get_fparser_tree_missing_file(): + """ + Coverage test: + - Test for raised Exception if file was not found + """ + + module_info: ModuleInfo = ModuleInfo( + "my_mod", FileInfo("/I_dont_exist/psyclone/asdf")) + + with pytest.raises(ModuleInfoError) as einfo: + module_info.get_fparser_tree() + + assert ("FileInfoFParserError: File '/I_dont_exist/psyclone/asdf'" + " not found:" in str(einfo.value)) + + +def test_minfo_type_errors(): + """ + Trigger type errors in constructor of module info + """ + + with pytest.raises(TypeError) as einfo: + ModuleInfo(None, None) + + assert ("Expected type 'str' for argument" + " 'module_name'" in str(einfo.value)) + + with pytest.raises(TypeError) as einfo: + ModuleInfo("foo", None) + + assert ("Expected type 'FileInfo' for argument" + " 'file_info'" in str(einfo.value)) + + +def test_empty_container(): + """ + Test covers the case that `None` was returned as a container. + """ + + file_info = FileInfo("dummy") + module_info = ModuleInfo("dummy", file_info) + + module_info.get_psyir = lambda: None + + retval = module_info.get_symbol("dummy") + assert retval is None diff --git a/src/psyclone/tests/parse/module_manager_test.py b/src/psyclone/tests/parse/module_manager_test.py index ef759c7064..72d9fa9c60 100644 --- a/src/psyclone/tests/parse/module_manager_test.py +++ b/src/psyclone/tests/parse/module_manager_test.py @@ -37,11 +37,10 @@ '''Module containing py.test tests for the ModuleManager.''' import os - import pytest from psyclone.errors import InternalError -from psyclone.parse import ModuleManager +from psyclone.parse import ModuleInfo, ModuleManager # ---------------------------------------------------------------------------- @@ -117,7 +116,7 @@ def test_mod_manager_precedence_preprocessed(): mod_man = ModuleManager.get() mod_man.add_search_path("d1") - mod_info = mod_man.get_module_info("a_mod") + mod_info: ModuleInfo = mod_man.get_module_info("a_mod") # Make sure we get the lower case filename: assert mod_info.filename == "d1/a_mod.f90" diff --git a/src/psyclone/tests/psyir/tools/call_tree_utils_test.py b/src/psyclone/tests/psyir/tools/call_tree_utils_test.py index 133668044f..397b198a0c 100644 --- a/src/psyclone/tests/psyir/tools/call_tree_utils_test.py +++ b/src/psyclone/tests/psyir/tools/call_tree_utils_test.py @@ -72,59 +72,61 @@ def test_call_tree_compute_all_non_locals_non_kernel(): ctu = CallTreeUtils() + container_node = mod_info.get_psyir() + # Check that using a local variable is not reported: - psyir = mod_info.get_psyir().find_routine_psyir("local_var_sub") + psyir = container_node.find_routine_psyir("local_var_sub") info = ctu._compute_all_non_locals(psyir) assert info == [] # Check using a variable that is used from the current module - psyir = mod_info.get_psyir().find_routine_psyir("module_var_sub") + psyir = container_node.find_routine_psyir("module_var_sub") info = ctu._compute_all_non_locals(psyir) assert info == [('reference', 'module_call_tree_mod', Signature("module_var"))] # Test that a call of a function in the same module is reported as # module routine: - psyir = mod_info.get_psyir().find_routine_psyir("call_local_function") + psyir = container_node.find_routine_psyir("call_local_function") info = ctu._compute_all_non_locals(psyir) assert info == [('routine', 'module_call_tree_mod', Signature("module_function"))] # Check using a local constant - psyir = mod_info.get_psyir().find_routine_psyir("local_const_sub") + psyir = container_node.find_routine_psyir("local_const_sub") info = ctu._compute_all_non_locals(psyir) assert info == [] # Check using an argument - psyir = mod_info.get_psyir().find_routine_psyir("argument_sub") + psyir = container_node.find_routine_psyir("argument_sub") info = ctu._compute_all_non_locals(psyir) assert info == [] # Check assigning the result to a function - psyir = mod_info.get_psyir().find_routine_psyir("module_function") + psyir = container_node.find_routine_psyir("module_function") info = ctu._compute_all_non_locals(psyir) assert info == [] # Check calling an undeclared function psyir = \ - mod_info.get_psyir().find_routine_psyir("calling_unknown_subroutine") + container_node.find_routine_psyir("calling_unknown_subroutine") info = ctu._compute_all_non_locals(psyir) assert info == [("routine", None, Signature("unknown_subroutine"))] # Check calling an imported subroutine psyir = \ - mod_info.get_psyir().find_routine_psyir("calling_imported_subroutine") + container_node.find_routine_psyir("calling_imported_subroutine") info = ctu._compute_all_non_locals(psyir) assert info == [("routine", "some_module", Signature("module_subroutine"))] # Check using an imported symbol - psyir = mod_info.get_psyir().find_routine_psyir("use_imported_symbol") + psyir = container_node.find_routine_psyir("use_imported_symbol") info = ctu._compute_all_non_locals(psyir) assert info == [("unknown", "some_module1", Signature("module_var1")), ("unknown", "some_module2", Signature("module_var2"))] # Check calling an undeclared function - psyir = mod_info.get_psyir().find_routine_psyir("intrinsic_call") + psyir = container_node.find_routine_psyir("intrinsic_call") info = ctu._compute_all_non_locals(psyir) assert info == [] @@ -199,8 +201,8 @@ def test_call_tree_get_used_symbols_from_modules(): mod_man.add_search_path(test_dir) mod_info = mod_man.get_module_info("testkern_import_symbols_mod") - psyir = \ - mod_info.get_psyir().find_routine_psyir("testkern_import_symbols_code") + container_node = mod_info.get_psyir() + psyir = container_node.find_routine_psyir("testkern_import_symbols_code") ctu = CallTreeUtils() non_locals = ctu.get_non_local_symbols(psyir) @@ -243,7 +245,8 @@ def test_call_tree_get_used_symbols_from_modules_renamed(): mod_man.add_search_path(test_dir) mod_info = mod_man.get_module_info("module_renaming_external_var_mod") - psyir = mod_info.get_psyir().find_routine_psyir("renaming_subroutine") + container_node = mod_info.get_psyir() + psyir = container_node.find_routine_psyir("renaming_subroutine") ctu = CallTreeUtils() non_locals = ctu.get_non_local_symbols(psyir) @@ -502,8 +505,9 @@ def test_module_info_generic_interfaces(): all_non_locals = [] for routine_name in all_routines: all_non_locals.extend( - ctu.get_non_local_symbols(mod_info.get_psyir(). - find_routine_psyir(routine_name))) + ctu.get_non_local_symbols( + mod_info.get_psyir(). + find_routine_psyir(routine_name))) # Both functions of the generic interface use 'module_var', # and in addition my_func1 uses module_var_1, myfunc2 uses module_var_2 # So three variables should be reported, i.e. module_var should only diff --git a/utils/run_flake8.sh b/utils/run_flake8.sh index 05f1d658d3..defa9539df 100755 --- a/utils/run_flake8.sh +++ b/utils/run_flake8.sh @@ -1,9 +1,9 @@ #!/bin/bash -SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" +FILE="$(realpath "$0")" +SCRIPTPATH="$( cd -- "$(dirname "$FILE")" >/dev/null 2>&1 ; pwd -P )" cd "$SCRIPTPATH/.." - # # Hint for vscode: # =========================== @@ -47,7 +47,6 @@ if ! command -v flake8; then exit 0 fi - flake8 src/psyclone if [[ $? -ne 0 ]]; then