From e057ae08e6bf6c6c84f276a127423fb145ca5fdb Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 24 Sep 2024 16:51:46 -0500 Subject: [PATCH 1/4] [ci] [python-package] temporarily stop testing against scikit-learn nightlies, load lib_lightgbm earlier (#6654) --- .ci/conda-envs/ci-core.txt | 2 +- .ci/test-python-latest.sh | 2 +- docs/FAQ.rst | 43 +++++++++++++++++++++++++++ python-package/lightgbm/__init__.py | 2 ++ python-package/lightgbm/basic.py | 45 ++++++++++++----------------- python-package/lightgbm/libpath.py | 14 ++++++++- 6 files changed, 79 insertions(+), 29 deletions(-) diff --git a/.ci/conda-envs/ci-core.txt b/.ci/conda-envs/ci-core.txt index ef04e4df2b3a..a0763580c7f3 100644 --- a/.ci/conda-envs/ci-core.txt +++ b/.ci/conda-envs/ci-core.txt @@ -23,7 +23,7 @@ joblib>=1.3.2 matplotlib-base>=3.7.3 numpy>=1.24.4 pandas>2.0 -pyarrow>=6.0 +pyarrow-core>=6.0 python-graphviz>=0.20.3 scikit-learn>=1.3.2 scipy>=1.1 diff --git a/.ci/test-python-latest.sh b/.ci/test-python-latest.sh index 08fc8558ef3e..f98f29f2641a 100755 --- a/.ci/test-python-latest.sh +++ b/.ci/test-python-latest.sh @@ -22,7 +22,7 @@ python -m pip install \ 'numpy>=2.0.0.dev0' \ 'matplotlib>=3.10.0.dev0' \ 'pandas>=3.0.0.dev0' \ - 'scikit-learn>=1.6.dev0' \ + 'scikit-learn==1.5.*' \ 'scipy>=1.15.0.dev0' python -m pip install \ diff --git a/docs/FAQ.rst b/docs/FAQ.rst index 3917b27a183a..2cec5fee06f6 100644 --- a/docs/FAQ.rst +++ b/docs/FAQ.rst @@ -206,6 +206,49 @@ Detailed description of conflicts between multiple OpenMP instances is provided If this is not your case, then you should find conflicting OpenMP library installations on your own and leave only one of them. +17. Loading LightGBM fails like: ``cannot allocate memory in static TLS block`` +------------------------------------------------------------------------------- + +When loading LightGBM, you may encounter errors like the following. + +.. code-block:: console + + lib/libgomp.so.1: cannot allocate memory in static TLS block + +This most commonly happens on aarch64 Linux systems. + +``gcc``'s OpenMP library (``libgomp.so``) tries to allocate a small amount of static thread-local storage ("TLS") +when it's dynamically loaded. + +That error can happen when the loader isn't able to find a large enough block of memory. + +On aarch64 Linux, processes and loaded libraries share the same pool of static TLS, +which makes such failures more likely. See these discussions: + +* https://bugzilla.redhat.com/show_bug.cgi?id=1722181#c6 +* https://gcc.gcc.gnu.narkive.com/vOXMQqLA/failure-to-dlopen-libgomp-due-to-static-tls-data + +If you are experiencing this issue when using the ``lightgbm`` Python package, try upgrading +to at least ``v4.6.0``. + +For older versions of the Python package, or for other LightGBM APIs, this issue can +often be avoided by loading ``libgomp.so.1``. That can be done directly by setting environment +variable ``LD_PRELOAD``, like this: + +.. code-block:: console + + export LD_PRELOAD=/root/miniconda3/envs/test-env/lib/libgomp.so.1 + +It can also be done indirectly by changing the order that other libraries are loaded +into processes, which varies by programming language and application type. + +For more details, see these discussions: + +* https://github.com/microsoft/LightGBM/pull/6654#issuecomment-2352014275 +* https://github.com/microsoft/LightGBM/issues/6509 +* https://maskray.me/blog/2021-02-14-all-about-thread-local-storage +* https://bugzilla.redhat.com/show_bug.cgi?id=1722181#c6 + ------ R-package diff --git a/python-package/lightgbm/__init__.py b/python-package/lightgbm/__init__.py index 600f71284159..b679b3f665b5 100644 --- a/python-package/lightgbm/__init__.py +++ b/python-package/lightgbm/__init__.py @@ -6,6 +6,8 @@ from pathlib import Path +# .basic is intentionally loaded as early as possible, to dlopen() lib_lightgbm.{dll,dylib,so} +# and its dependencies as early as possible from .basic import Booster, Dataset, Sequence, register_logger from .callback import EarlyStopException, early_stopping, log_evaluation, record_evaluation, reset_parameter from .engine import CVBooster, cv, train diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index af4d757f480b..73c040b7da4e 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -1,6 +1,13 @@ # coding: utf-8 """Wrapper for C API of LightGBM.""" +# This import causes lib_lightgbm.{dll,dylib,so} to be loaded. +# It's intentionally done here, as early as possible, to avoid issues like +# "libgomp.so.1: cannot allocate memory in static TLS block" on aarch64 Linux. +# +# For details, see the "cannot allocate memory in static TLS block" entry in docs/FAQ.rst. +from .libpath import _LIB # isort: skip + import abc import ctypes import inspect @@ -37,7 +44,6 @@ pd_DataFrame, pd_Series, ) -from .libpath import find_lib_path if TYPE_CHECKING: from typing import Literal @@ -160,6 +166,12 @@ _MULTICLASS_OBJECTIVES = {"multiclass", "multiclassova", "multiclass_ova", "ova", "ovr", "softmax"} +class LightGBMError(Exception): + """Error thrown by LightGBM.""" + + pass + + def _is_zero(x: float) -> bool: return -ZERO_THRESHOLD <= x <= ZERO_THRESHOLD @@ -259,26 +271,13 @@ def _log_callback(msg: bytes) -> None: _log_native(str(msg.decode("utf-8"))) -def _load_lib() -> ctypes.CDLL: - """Load LightGBM library.""" - lib_path = find_lib_path() - lib = ctypes.cdll.LoadLibrary(lib_path[0]) - lib.LGBM_GetLastError.restype = ctypes.c_char_p +# connect the Python logger to logging in lib_lightgbm +if not environ.get("LIGHTGBM_BUILD_DOC", False): + _LIB.LGBM_GetLastError.restype = ctypes.c_char_p callback = ctypes.CFUNCTYPE(None, ctypes.c_char_p) - lib.callback = callback(_log_callback) # type: ignore[attr-defined] - if lib.LGBM_RegisterLogCallback(lib.callback) != 0: - raise LightGBMError(lib.LGBM_GetLastError().decode("utf-8")) - return lib - - -# we don't need lib_lightgbm while building docs -_LIB: ctypes.CDLL -if environ.get("LIGHTGBM_BUILD_DOC", False): - from unittest.mock import Mock # isort: skip - - _LIB = Mock(ctypes.CDLL) # type: ignore -else: - _LIB = _load_lib() + _LIB.callback = callback(_log_callback) # type: ignore[attr-defined] + if _LIB.LGBM_RegisterLogCallback(_LIB.callback) != 0: + raise LightGBMError(_LIB.LGBM_GetLastError().decode("utf-8")) _NUMERIC_TYPES = (int, float, bool) @@ -552,12 +551,6 @@ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: self.path.unlink() -class LightGBMError(Exception): - """Error thrown by LightGBM.""" - - pass - - # DeprecationWarning is not shown by default, so let's create our own with higher level # ref: https://peps.python.org/pep-0565/#additional-use-case-for-futurewarning class LGBMDeprecationWarning(FutureWarning): diff --git a/python-package/lightgbm/libpath.py b/python-package/lightgbm/libpath.py index a55e7362ab44..0e6b8425dccb 100644 --- a/python-package/lightgbm/libpath.py +++ b/python-package/lightgbm/libpath.py @@ -1,6 +1,8 @@ # coding: utf-8 """Find the path to LightGBM dynamic library files.""" +import ctypes +from os import environ from pathlib import Path from platform import system from typing import List @@ -8,7 +10,7 @@ __all__: List[str] = [] -def find_lib_path() -> List[str]: +def _find_lib_path() -> List[str]: """Find the path to LightGBM library files. Returns @@ -35,3 +37,13 @@ def find_lib_path() -> List[str]: dll_path_joined = "\n".join(map(str, dll_path)) raise Exception(f"Cannot find lightgbm library file in following paths:\n{dll_path_joined}") return lib_path + + +# we don't need lib_lightgbm while building docs +_LIB: ctypes.CDLL +if environ.get("LIGHTGBM_BUILD_DOC", False): + from unittest.mock import Mock # isort: skip + + _LIB = Mock(ctypes.CDLL) # type: ignore +else: + _LIB = ctypes.cdll.LoadLibrary(_find_lib_path()[0]) From 17b739cebba5f7542477861aa5683e3011a70534 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Wed, 25 Sep 2024 18:21:20 +0300 Subject: [PATCH 2/4] [cmake] remove unused variable (#6647) Co-authored-by: James Lamb --- CMakeLists.txt | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 048818ff1c1b..c8e367f3e1a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -376,17 +376,6 @@ if(WIN32) endif() if(MSVC) - set( - variables - CMAKE_C_FLAGS_DEBUG - CMAKE_C_FLAGS_MINSIZEREL - CMAKE_C_FLAGS_RELEASE - CMAKE_C_FLAGS_RELWITHDEBINFO - CMAKE_CXX_FLAGS_DEBUG - CMAKE_CXX_FLAGS_MINSIZEREL - CMAKE_CXX_FLAGS_RELEASE - CMAKE_CXX_FLAGS_RELWITHDEBINFO - ) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /MP") if(__BUILD_FOR_R) # MSVC does not like this commit: From 696b904874a6c91f0390401bdb1085e3be55a274 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 25 Sep 2024 10:21:37 -0500 Subject: [PATCH 3/4] [ci] [R-package] re-enable R-devel clang16 job (fixes #6607) (#6642) --- .github/workflows/r_package.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml index fd7b83187170..1758583ad8e4 100644 --- a/.github/workflows/r_package.yml +++ b/.github/workflows/r_package.yml @@ -268,9 +268,7 @@ jobs: # * CRAN "additional checks": https://cran.r-project.org/web/checks/check_issue_kinds.html # * images: https://r-hub.github.io/containers/containers.html image: - # clang16 should be re-enabled once it's fixed upstream - # ref: https://github.com/microsoft/LightGBM/issues/6607 - #- clang16 + - clang16 - clang17 - clang18 - clang19 From 59a3432b9d26290fcf25ba12b82feedd05384832 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Thu, 26 Sep 2024 05:39:45 +0300 Subject: [PATCH 4/4] [cmake] simplify SWIG config (#6648) Co-authored-by: James Lamb --- CMakeLists.txt | 114 ++++++++++++++++--------------------------------- 1 file changed, 36 insertions(+), 78 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c8e367f3e1a6..4df470844434 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,10 +59,10 @@ if(__INTEGRATE_OPENCL) message(STATUS "Building library with integrated OpenCL components") endif() -if(__BUILD_FOR_PYTHON OR __BUILD_FOR_R) - # the Python and R package don't require the CLI +if(__BUILD_FOR_PYTHON OR __BUILD_FOR_R OR USE_SWIG) + # the SWIG wrapper, the Python and R package don't require the CLI set(BUILD_CLI OFF) - # installing the R and Python package shouldn't place LightGBM's headers + # installing the SWIG wrapper, the R and Python package shouldn't place LightGBM's headers # outside of where the package is installed set(INSTALL_HEADERS OFF) endif() @@ -104,15 +104,16 @@ if(USE_SWIG) include_directories(JNI_INCLUDE_DIRS) include_directories($ENV{JAVA_HOME}/include) if(WIN32) - file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/com/microsoft/ml/lightgbm/windows/x86_64") + set(LGBM_SWIG_DESTINATION_DIR "${CMAKE_CURRENT_BINARY_DIR}/com/microsoft/ml/lightgbm/windows/x86_64") include_directories($ENV{JAVA_HOME}/include/win32) elseif(APPLE) - file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/com/microsoft/ml/lightgbm/osx/x86_64") + set(LGBM_SWIG_DESTINATION_DIR "${CMAKE_CURRENT_BINARY_DIR}/com/microsoft/ml/lightgbm/osx/x86_64") include_directories($ENV{JAVA_HOME}/include/darwin) else() - file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/com/microsoft/ml/lightgbm/linux/x86_64") + set(LGBM_SWIG_DESTINATION_DIR "${CMAKE_CURRENT_BINARY_DIR}/com/microsoft/ml/lightgbm/linux/x86_64") include_directories($ENV{JAVA_HOME}/include/linux) endif() + file(MAKE_DIRECTORY "${LGBM_SWIG_DESTINATION_DIR}") endif() set(EIGEN_DIR "${PROJECT_SOURCE_DIR}/external_libs/eigen") @@ -542,84 +543,41 @@ if(USE_SWIG) OUTPUT_NAME "lib_lightgbm_swig" ) if(WIN32) + set(LGBM_SWIG_LIB_DESTINATION_PATH "${LGBM_SWIG_DESTINATION_DIR}/lib_lightgbm_swig.dll") if(MINGW OR CYGWIN) - add_custom_command( - TARGET _lightgbm_swig - POST_BUILD - COMMAND "${Java_JAVAC_EXECUTABLE}" -d . java/*.java - COMMAND - "${CMAKE_COMMAND}" - -E - copy_if_different - "${PROJECT_SOURCE_DIR}/lib_lightgbm.dll" - com/microsoft/ml/lightgbm/windows/x86_64 - COMMAND - "${CMAKE_COMMAND}" - -E - copy_if_different - "${PROJECT_SOURCE_DIR}/lib_lightgbm_swig.dll" - com/microsoft/ml/lightgbm/windows/x86_64 - COMMAND "${Java_JAR_EXECUTABLE}" -cf lightgbmlib.jar com - ) + set(LGBM_LIB_SOURCE_PATH "${PROJECT_SOURCE_DIR}/lib_lightgbm.dll") + set(LGBM_SWIG_LIB_SOURCE_PATH "${PROJECT_SOURCE_DIR}/lib_lightgbm_swig.dll") else() - add_custom_command( - TARGET _lightgbm_swig - POST_BUILD - COMMAND "${Java_JAVAC_EXECUTABLE}" -d . java/*.java - COMMAND - "${CMAKE_COMMAND}" - -E - copy_if_different - "${PROJECT_SOURCE_DIR}/Release/lib_lightgbm.dll" - com/microsoft/ml/lightgbm/windows/x86_64 - COMMAND - "${CMAKE_COMMAND}" - -E - copy_if_different - "${PROJECT_SOURCE_DIR}/Release/lib_lightgbm_swig.dll" - com/microsoft/ml/lightgbm/windows/x86_64 - COMMAND "${Java_JAR_EXECUTABLE}" -cf lightgbmlib.jar com - ) + set(LGBM_LIB_SOURCE_PATH "${PROJECT_SOURCE_DIR}/Release/lib_lightgbm.dll") + set(LGBM_SWIG_LIB_SOURCE_PATH "${PROJECT_SOURCE_DIR}/Release/lib_lightgbm_swig.dll") endif() elseif(APPLE) - add_custom_command( - TARGET _lightgbm_swig - POST_BUILD - COMMAND "${Java_JAVAC_EXECUTABLE}" -d . java/*.java - COMMAND - "${CMAKE_COMMAND}" - -E - copy_if_different - "${PROJECT_SOURCE_DIR}/lib_lightgbm.dylib" - com/microsoft/ml/lightgbm/osx/x86_64 - COMMAND - "${CMAKE_COMMAND}" - -E - copy_if_different - "${PROJECT_SOURCE_DIR}/lib_lightgbm_swig.jnilib" - com/microsoft/ml/lightgbm/osx/x86_64/lib_lightgbm_swig.dylib - COMMAND "${Java_JAR_EXECUTABLE}" -cf lightgbmlib.jar com - ) + set(LGBM_LIB_SOURCE_PATH "${PROJECT_SOURCE_DIR}/lib_lightgbm.dylib") + set(LGBM_SWIG_LIB_SOURCE_PATH "${PROJECT_SOURCE_DIR}/lib_lightgbm_swig.jnilib") + set(LGBM_SWIG_LIB_DESTINATION_PATH "${LGBM_SWIG_DESTINATION_DIR}/lib_lightgbm_swig.dylib") else() - add_custom_command( - TARGET _lightgbm_swig - POST_BUILD - COMMAND "${Java_JAVAC_EXECUTABLE}" -d . java/*.java - COMMAND - "${CMAKE_COMMAND}" - -E - copy_if_different - "${PROJECT_SOURCE_DIR}/lib_lightgbm.so" - com/microsoft/ml/lightgbm/linux/x86_64 - COMMAND - "${CMAKE_COMMAND}" - -E - copy_if_different - "${PROJECT_SOURCE_DIR}/lib_lightgbm_swig.so" - com/microsoft/ml/lightgbm/linux/x86_64 - COMMAND "${Java_JAR_EXECUTABLE}" -cf lightgbmlib.jar com - ) + set(LGBM_LIB_SOURCE_PATH "${PROJECT_SOURCE_DIR}/lib_lightgbm.so") + set(LGBM_SWIG_LIB_SOURCE_PATH "${PROJECT_SOURCE_DIR}/lib_lightgbm_swig.so") + set(LGBM_SWIG_LIB_DESTINATION_PATH "${LGBM_SWIG_DESTINATION_DIR}/lib_lightgbm_swig.so") endif() + add_custom_command( + TARGET _lightgbm_swig + POST_BUILD + COMMAND "${Java_JAVAC_EXECUTABLE}" -d . java/*.java + COMMAND + "${CMAKE_COMMAND}" + -E + copy_if_different + "${LGBM_LIB_SOURCE_PATH}" + "${LGBM_SWIG_DESTINATION_DIR}" + COMMAND + "${CMAKE_COMMAND}" + -E + copy_if_different + "${LGBM_SWIG_LIB_SOURCE_PATH}" + "${LGBM_SWIG_LIB_DESTINATION_PATH}" + COMMAND "${Java_JAR_EXECUTABLE}" -cf lightgbmlib.jar com + ) endif() if(USE_MPI)