Skip to content

Commit

Permalink
Merge branch 'master' into bagging/bagging-by-query-for-lambdarank
Browse files Browse the repository at this point in the history
  • Loading branch information
shiyu1994 authored Oct 1, 2024
2 parents 0993154 + 59a3432 commit 8a9b356
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 121 deletions.
2 changes: 1 addition & 1 deletion .ci/conda-envs/ci-core.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ joblib>=1.3.2
matplotlib-base>=3.7.3
numpy>=1.24.4
pandas>2.0
pyarrow>=6.0
pyarrow-core>=6.0
python-graphviz>=0.20.3
scikit-learn>=1.3.2
scipy>=1.1
Expand Down
2 changes: 1 addition & 1 deletion .ci/test-python-latest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ python -m pip install \
'numpy>=2.0.0.dev0' \
'matplotlib>=3.10.0.dev0' \
'pandas>=3.0.0.dev0' \
'scikit-learn>=1.6.dev0' \
'scikit-learn==1.5.*' \
'scipy>=1.15.0.dev0'

python -m pip install \
Expand Down
4 changes: 1 addition & 3 deletions .github/workflows/r_package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -268,9 +268,7 @@ jobs:
# * CRAN "additional checks": https://cran.r-project.org/web/checks/check_issue_kinds.html
# * images: https://r-hub.github.io/containers/containers.html
image:
# clang16 should be re-enabled once it's fixed upstream
# ref: https://github.com/microsoft/LightGBM/issues/6607
#- clang16
- clang16
- clang17
- clang18
- clang19
Expand Down
125 changes: 36 additions & 89 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,10 @@ if(__INTEGRATE_OPENCL)
message(STATUS "Building library with integrated OpenCL components")
endif()

if(__BUILD_FOR_PYTHON OR __BUILD_FOR_R)
# the Python and R package don't require the CLI
if(__BUILD_FOR_PYTHON OR __BUILD_FOR_R OR USE_SWIG)
# the SWIG wrapper, the Python and R package don't require the CLI
set(BUILD_CLI OFF)
# installing the R and Python package shouldn't place LightGBM's headers
# installing the SWIG wrapper, the R and Python package shouldn't place LightGBM's headers
# outside of where the package is installed
set(INSTALL_HEADERS OFF)
endif()
Expand Down Expand Up @@ -104,15 +104,16 @@ if(USE_SWIG)
include_directories(JNI_INCLUDE_DIRS)
include_directories($ENV{JAVA_HOME}/include)
if(WIN32)
file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/com/microsoft/ml/lightgbm/windows/x86_64")
set(LGBM_SWIG_DESTINATION_DIR "${CMAKE_CURRENT_BINARY_DIR}/com/microsoft/ml/lightgbm/windows/x86_64")
include_directories($ENV{JAVA_HOME}/include/win32)
elseif(APPLE)
file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/com/microsoft/ml/lightgbm/osx/x86_64")
set(LGBM_SWIG_DESTINATION_DIR "${CMAKE_CURRENT_BINARY_DIR}/com/microsoft/ml/lightgbm/osx/x86_64")
include_directories($ENV{JAVA_HOME}/include/darwin)
else()
file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/com/microsoft/ml/lightgbm/linux/x86_64")
set(LGBM_SWIG_DESTINATION_DIR "${CMAKE_CURRENT_BINARY_DIR}/com/microsoft/ml/lightgbm/linux/x86_64")
include_directories($ENV{JAVA_HOME}/include/linux)
endif()
file(MAKE_DIRECTORY "${LGBM_SWIG_DESTINATION_DIR}")
endif()

set(EIGEN_DIR "${PROJECT_SOURCE_DIR}/external_libs/eigen")
Expand Down Expand Up @@ -376,17 +377,6 @@ if(WIN32)
endif()

if(MSVC)
set(
variables
CMAKE_C_FLAGS_DEBUG
CMAKE_C_FLAGS_MINSIZEREL
CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_RELWITHDEBINFO
CMAKE_CXX_FLAGS_DEBUG
CMAKE_CXX_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_RELWITHDEBINFO
)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /MP")
if(__BUILD_FOR_R)
# MSVC does not like this commit:
Expand Down Expand Up @@ -553,84 +543,41 @@ if(USE_SWIG)
OUTPUT_NAME "lib_lightgbm_swig"
)
if(WIN32)
set(LGBM_SWIG_LIB_DESTINATION_PATH "${LGBM_SWIG_DESTINATION_DIR}/lib_lightgbm_swig.dll")
if(MINGW OR CYGWIN)
add_custom_command(
TARGET _lightgbm_swig
POST_BUILD
COMMAND "${Java_JAVAC_EXECUTABLE}" -d . java/*.java
COMMAND
"${CMAKE_COMMAND}"
-E
copy_if_different
"${PROJECT_SOURCE_DIR}/lib_lightgbm.dll"
com/microsoft/ml/lightgbm/windows/x86_64
COMMAND
"${CMAKE_COMMAND}"
-E
copy_if_different
"${PROJECT_SOURCE_DIR}/lib_lightgbm_swig.dll"
com/microsoft/ml/lightgbm/windows/x86_64
COMMAND "${Java_JAR_EXECUTABLE}" -cf lightgbmlib.jar com
)
set(LGBM_LIB_SOURCE_PATH "${PROJECT_SOURCE_DIR}/lib_lightgbm.dll")
set(LGBM_SWIG_LIB_SOURCE_PATH "${PROJECT_SOURCE_DIR}/lib_lightgbm_swig.dll")
else()
add_custom_command(
TARGET _lightgbm_swig
POST_BUILD
COMMAND "${Java_JAVAC_EXECUTABLE}" -d . java/*.java
COMMAND
"${CMAKE_COMMAND}"
-E
copy_if_different
"${PROJECT_SOURCE_DIR}/Release/lib_lightgbm.dll"
com/microsoft/ml/lightgbm/windows/x86_64
COMMAND
"${CMAKE_COMMAND}"
-E
copy_if_different
"${PROJECT_SOURCE_DIR}/Release/lib_lightgbm_swig.dll"
com/microsoft/ml/lightgbm/windows/x86_64
COMMAND "${Java_JAR_EXECUTABLE}" -cf lightgbmlib.jar com
)
set(LGBM_LIB_SOURCE_PATH "${PROJECT_SOURCE_DIR}/Release/lib_lightgbm.dll")
set(LGBM_SWIG_LIB_SOURCE_PATH "${PROJECT_SOURCE_DIR}/Release/lib_lightgbm_swig.dll")
endif()
elseif(APPLE)
add_custom_command(
TARGET _lightgbm_swig
POST_BUILD
COMMAND "${Java_JAVAC_EXECUTABLE}" -d . java/*.java
COMMAND
"${CMAKE_COMMAND}"
-E
copy_if_different
"${PROJECT_SOURCE_DIR}/lib_lightgbm.dylib"
com/microsoft/ml/lightgbm/osx/x86_64
COMMAND
"${CMAKE_COMMAND}"
-E
copy_if_different
"${PROJECT_SOURCE_DIR}/lib_lightgbm_swig.jnilib"
com/microsoft/ml/lightgbm/osx/x86_64/lib_lightgbm_swig.dylib
COMMAND "${Java_JAR_EXECUTABLE}" -cf lightgbmlib.jar com
)
set(LGBM_LIB_SOURCE_PATH "${PROJECT_SOURCE_DIR}/lib_lightgbm.dylib")
set(LGBM_SWIG_LIB_SOURCE_PATH "${PROJECT_SOURCE_DIR}/lib_lightgbm_swig.jnilib")
set(LGBM_SWIG_LIB_DESTINATION_PATH "${LGBM_SWIG_DESTINATION_DIR}/lib_lightgbm_swig.dylib")
else()
add_custom_command(
TARGET _lightgbm_swig
POST_BUILD
COMMAND "${Java_JAVAC_EXECUTABLE}" -d . java/*.java
COMMAND
"${CMAKE_COMMAND}"
-E
copy_if_different
"${PROJECT_SOURCE_DIR}/lib_lightgbm.so"
com/microsoft/ml/lightgbm/linux/x86_64
COMMAND
"${CMAKE_COMMAND}"
-E
copy_if_different
"${PROJECT_SOURCE_DIR}/lib_lightgbm_swig.so"
com/microsoft/ml/lightgbm/linux/x86_64
COMMAND "${Java_JAR_EXECUTABLE}" -cf lightgbmlib.jar com
)
set(LGBM_LIB_SOURCE_PATH "${PROJECT_SOURCE_DIR}/lib_lightgbm.so")
set(LGBM_SWIG_LIB_SOURCE_PATH "${PROJECT_SOURCE_DIR}/lib_lightgbm_swig.so")
set(LGBM_SWIG_LIB_DESTINATION_PATH "${LGBM_SWIG_DESTINATION_DIR}/lib_lightgbm_swig.so")
endif()
add_custom_command(
TARGET _lightgbm_swig
POST_BUILD
COMMAND "${Java_JAVAC_EXECUTABLE}" -d . java/*.java
COMMAND
"${CMAKE_COMMAND}"
-E
copy_if_different
"${LGBM_LIB_SOURCE_PATH}"
"${LGBM_SWIG_DESTINATION_DIR}"
COMMAND
"${CMAKE_COMMAND}"
-E
copy_if_different
"${LGBM_SWIG_LIB_SOURCE_PATH}"
"${LGBM_SWIG_LIB_DESTINATION_PATH}"
COMMAND "${Java_JAR_EXECUTABLE}" -cf lightgbmlib.jar com
)
endif()

if(USE_MPI)
Expand Down
43 changes: 43 additions & 0 deletions docs/FAQ.rst
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,49 @@ Detailed description of conflicts between multiple OpenMP instances is provided

If this is not your case, then you should find conflicting OpenMP library installations on your own and leave only one of them.

17. Loading LightGBM fails like: ``cannot allocate memory in static TLS block``
-------------------------------------------------------------------------------

When loading LightGBM, you may encounter errors like the following.

.. code-block:: console
lib/libgomp.so.1: cannot allocate memory in static TLS block
This most commonly happens on aarch64 Linux systems.

``gcc``'s OpenMP library (``libgomp.so``) tries to allocate a small amount of static thread-local storage ("TLS")
when it's dynamically loaded.

That error can happen when the loader isn't able to find a large enough block of memory.

On aarch64 Linux, processes and loaded libraries share the same pool of static TLS,
which makes such failures more likely. See these discussions:

* https://bugzilla.redhat.com/show_bug.cgi?id=1722181#c6
* https://gcc.gcc.gnu.narkive.com/vOXMQqLA/failure-to-dlopen-libgomp-due-to-static-tls-data

If you are experiencing this issue when using the ``lightgbm`` Python package, try upgrading
to at least ``v4.6.0``.

For older versions of the Python package, or for other LightGBM APIs, this issue can
often be avoided by loading ``libgomp.so.1``. That can be done directly by setting environment
variable ``LD_PRELOAD``, like this:

.. code-block:: console
export LD_PRELOAD=/root/miniconda3/envs/test-env/lib/libgomp.so.1
It can also be done indirectly by changing the order that other libraries are loaded
into processes, which varies by programming language and application type.

For more details, see these discussions:

* https://github.com/microsoft/LightGBM/pull/6654#issuecomment-2352014275
* https://github.com/microsoft/LightGBM/issues/6509
* https://maskray.me/blog/2021-02-14-all-about-thread-local-storage
* https://bugzilla.redhat.com/show_bug.cgi?id=1722181#c6

------

R-package
Expand Down
2 changes: 2 additions & 0 deletions python-package/lightgbm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

from pathlib import Path

# .basic is intentionally loaded as early as possible, to dlopen() lib_lightgbm.{dll,dylib,so}
# and its dependencies as early as possible
from .basic import Booster, Dataset, Sequence, register_logger
from .callback import EarlyStopException, early_stopping, log_evaluation, record_evaluation, reset_parameter
from .engine import CVBooster, cv, train
Expand Down
45 changes: 19 additions & 26 deletions python-package/lightgbm/basic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# coding: utf-8
"""Wrapper for C API of LightGBM."""

# This import causes lib_lightgbm.{dll,dylib,so} to be loaded.
# It's intentionally done here, as early as possible, to avoid issues like
# "libgomp.so.1: cannot allocate memory in static TLS block" on aarch64 Linux.
#
# For details, see the "cannot allocate memory in static TLS block" entry in docs/FAQ.rst.
from .libpath import _LIB # isort: skip

import abc
import ctypes
import inspect
Expand Down Expand Up @@ -37,7 +44,6 @@
pd_DataFrame,
pd_Series,
)
from .libpath import find_lib_path

if TYPE_CHECKING:
from typing import Literal
Expand Down Expand Up @@ -160,6 +166,12 @@
_MULTICLASS_OBJECTIVES = {"multiclass", "multiclassova", "multiclass_ova", "ova", "ovr", "softmax"}


class LightGBMError(Exception):
"""Error thrown by LightGBM."""

pass


def _is_zero(x: float) -> bool:
return -ZERO_THRESHOLD <= x <= ZERO_THRESHOLD

Expand Down Expand Up @@ -259,26 +271,13 @@ def _log_callback(msg: bytes) -> None:
_log_native(str(msg.decode("utf-8")))


def _load_lib() -> ctypes.CDLL:
"""Load LightGBM library."""
lib_path = find_lib_path()
lib = ctypes.cdll.LoadLibrary(lib_path[0])
lib.LGBM_GetLastError.restype = ctypes.c_char_p
# connect the Python logger to logging in lib_lightgbm
if not environ.get("LIGHTGBM_BUILD_DOC", False):
_LIB.LGBM_GetLastError.restype = ctypes.c_char_p
callback = ctypes.CFUNCTYPE(None, ctypes.c_char_p)
lib.callback = callback(_log_callback) # type: ignore[attr-defined]
if lib.LGBM_RegisterLogCallback(lib.callback) != 0:
raise LightGBMError(lib.LGBM_GetLastError().decode("utf-8"))
return lib


# we don't need lib_lightgbm while building docs
_LIB: ctypes.CDLL
if environ.get("LIGHTGBM_BUILD_DOC", False):
from unittest.mock import Mock # isort: skip

_LIB = Mock(ctypes.CDLL) # type: ignore
else:
_LIB = _load_lib()
_LIB.callback = callback(_log_callback) # type: ignore[attr-defined]
if _LIB.LGBM_RegisterLogCallback(_LIB.callback) != 0:
raise LightGBMError(_LIB.LGBM_GetLastError().decode("utf-8"))


_NUMERIC_TYPES = (int, float, bool)
Expand Down Expand Up @@ -552,12 +551,6 @@ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
self.path.unlink()


class LightGBMError(Exception):
"""Error thrown by LightGBM."""

pass


# DeprecationWarning is not shown by default, so let's create our own with higher level
# ref: https://peps.python.org/pep-0565/#additional-use-case-for-futurewarning
class LGBMDeprecationWarning(FutureWarning):
Expand Down
14 changes: 13 additions & 1 deletion python-package/lightgbm/libpath.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
# coding: utf-8
"""Find the path to LightGBM dynamic library files."""

import ctypes
from os import environ
from pathlib import Path
from platform import system
from typing import List

__all__: List[str] = []


def find_lib_path() -> List[str]:
def _find_lib_path() -> List[str]:
"""Find the path to LightGBM library files.
Returns
Expand All @@ -35,3 +37,13 @@ def find_lib_path() -> List[str]:
dll_path_joined = "\n".join(map(str, dll_path))
raise Exception(f"Cannot find lightgbm library file in following paths:\n{dll_path_joined}")
return lib_path


# we don't need lib_lightgbm while building docs
_LIB: ctypes.CDLL
if environ.get("LIGHTGBM_BUILD_DOC", False):
from unittest.mock import Mock # isort: skip

_LIB = Mock(ctypes.CDLL) # type: ignore
else:
_LIB = ctypes.cdll.LoadLibrary(_find_lib_path()[0])

0 comments on commit 8a9b356

Please sign in to comment.