Skip to content

Commit

Permalink
resolve module names in import hook (#1289)
Browse files Browse the repository at this point in the history
* resolve module names in import hook

* alternate implementation based on importlib

* refactor and re-organize

* add unit test
  • Loading branch information
kevinushey authored Oct 19, 2022
1 parent a83dbb9 commit a1d7f7f
Show file tree
Hide file tree
Showing 4 changed files with 178 additions and 63 deletions.
10 changes: 10 additions & 0 deletions R/testthat-helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,13 @@ skip_if_no_conda <- function() {
skip("conda not available for testing")

}

skip_if_no_matplotlib <- function() {

skip_on_cran()
skip_if_no_python()

if (!py_module_available("matplotlib"))
skip("matplotlib not available for testing")

}
211 changes: 148 additions & 63 deletions inst/python/rpytools/loader.py
Original file line number Diff line number Diff line change
@@ -1,73 +1,158 @@

import sys
import threading

# a list of newly-imported packages
if sys.version_info.major < 3:
import __builtin__ as builtins
else:
import builtins


# The R callback to be run. Initialized in the 'initialize()' method.
_callback = None

# A list of Python packages which have been imported. The aforementioned
# callback will be run on the main thread after a module has been imported
# on the main thread.
_imported_packages = []

# check for recursive imports
# A simple counter, tracking the recursion depth. This is used as we only
# attempt to run the R callback at the top level; that is, we don't want
# to run it while modules are being loaded recursively.
_recursion_depth = 0

# adapted from:
# https://stackoverflow.com/questions/40623889/post-import-hooks-in-python-3
def initialize(callback):
# The builtin implementation of '__import__'; saved so that we can re-use it
# after initialization.
__import__ = builtins.__import__

# The implementation of '_find_and_load' captured from 'importlib._bootstrap'.
# Since we're trying to poke at Python internals, we try to wrap this code
# in try-catch and only use this if it appears safe to do so.
_find_and_load = None
try:
import importlib._bootstrap
_find_and_load = importlib._bootstrap._find_and_load
except:
pass


# Run hooks on imported packages, if safe to do so.
def _maybe_run_hooks():

# Don't run hooks while loading packages recursively.
global _recursion_depth
if _recursion_depth != 0:
return False

# Check whether we're on the main thread. Note that separate threads can
# attempt to load Python modules, but the R callback we register can only
# be safely run on the main thread.
is_main_thread = isinstance(threading.current_thread(), threading._MainThread)
if not is_main_thread:
return False

# Pre-flight checks passed; run the callbacks.
global _imported_packages
global _callback
for package in _imported_packages:
_callback(package)

# Clear the import list.
del _imported_packages[:]


# Resolve a module name on import. See Python code here for motivation.
# https://github.com/python/cpython/blob/c5140945c723ae6c4b7ee81ff720ac8ea4b52cfd/Lib/importlib/_bootstrap.py#L1246-L1270
def _resolve_module_name(name, globals=None, level=0):

if level == 0:
return name

package = globals.get("__package__")
if package is not None:
return package

spec = globals.get("__spec__")
if spec is not None:
return spec.parent

# NOTE: we try to import '__builtin__' first as 'builtins' is real
# module provided by Python 2.x but it doesn't actually provide the
# __import__ function definition!
return name

# Helper function for running an import hook with our extra scaffolding.
def _run_hook(name, hook):

# Check whether this module has already been imported.
already_imported = name in sys.modules

# Bump the recursion depth.
global _recursion_depth
_recursion_depth += 1

# Run the hook.
try:
import __builtin__ as builtins # python2.x
except ImportError:
import builtins # python3.x

import sys

# save the original import implementation
_import = builtins.__import__

# define our import hook
def _import_hook(name, globals=None, locals=None, fromlist=(), level=0):

# check whether the module has already been imported
already_imported = name in sys.modules

# bump the recursion level
global _recursion_depth
_recursion_depth += 1

# perform the import
try:
module = _import(
name,
globals=globals,
locals=locals,
fromlist=fromlist,
level=level
)
except:
raise
finally:
_recursion_depth -= 1

# if we haven't already imported this package, push
# it onto the imported package list
global _imported_packages
if not already_imported:
_imported_packages.append(name)

# check whether we can run our import hooks
#
# NOTE: Python code running on a separate thread might need to import
# modules; if this occurs then we need to ensure that our R callback
# is invoked only on the main thread
is_main_thread = isinstance(threading.current_thread(), threading._MainThread)
run_hooks = _recursion_depth == 0 and is_main_thread

# run our hooks if all safe
if run_hooks:
[callback(package) for package in _imported_packages]
del _imported_packages[:]

return module

# apply our import hook
module = hook()
except:
raise
finally:
_recursion_depth -= 1

# Add this package to the import list, if this is the first
# time importing that package.
global _imported_packages
if not already_imported:
_imported_packages.append(name)

# try and run hooks if possible
_maybe_run_hooks()

# return loaded module
return module


# The hook installed to replace 'importlib._bootstrap._find_and_load'.
def _find_and_load_hook(name, import_):

def _hook():
global _find_and_load
return _find_and_load(name, import_)

return _run_hook(name, _hook)

# Initialize the '_find_and_load' replacement hook.
def _initialize_importlib():

import importlib._bootstrap
importlib._bootstrap._find_and_load = _find_and_load_hook


# The hook installed to replace '__import__'.
def _import_hook(name, globals=None, locals=None, fromlist=(), level=0):

# resolve module name
resolved_module_name = _resolve_module_name(name, globals, level)

def _hook():
global __import__
return __import__(name, globals=globals, locals=locals, fromlist=fromlist, level=level)

return _run_hook(_hook)

# Initialize the '__import__' hook.
def _initialize_default():
builtins.__import__ = _import_hook


# The main entrypoint for this module.
def initialize(callback):

# Save the callback.
global _callback
_callback = callback

# Check whether we can initialie with importlib.
global _find_and_load
if _find_and_load is not None:
return _initialize_importlib()

# Otherwise, fall back to default implementation.
return _initialize_default()
4 changes: 4 additions & 0 deletions tests/testthat/resources/import-test.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

library(reticulate)
options(reticulate.logModuleLoad = TRUE)
reticulate::py_run_string("from matplotlib import pyplot as plt")
16 changes: 16 additions & 0 deletions tests/testthat/test-python-import-hook.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
context("imports")

test_that("The reticulate import hook handles recursive imports", {

skip_if_no_matplotlib()

R <- file.path(R.home("bin"), "R")
script <- "resources/import-test.R"
args <- c("--no-save", "--no-restore", "-s", "-f", shQuote(script))
output <- system2(R, args, stdout = TRUE, stderr = TRUE)

pattern <- "Loaded module '(.*)'"
modules <- gsub(pattern, "\\1", output)
expect_true("matplotlib.pyplot" %in% modules)

})

0 comments on commit a1d7f7f

Please sign in to comment.