Skip to content

Commit

Permalink
Merge branch 'master' into 6021-fix_single_row_contention
Browse files Browse the repository at this point in the history
  • Loading branch information
shiyu1994 authored Aug 15, 2023
2 parents c52a7d7 + e0d63b5 commit 601316b
Show file tree
Hide file tree
Showing 64 changed files with 804 additions and 540 deletions.
2 changes: 1 addition & 1 deletion .appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: 4.0.0.{build}
version: 4.0.0.99.{build}

image: Visual Studio 2015
platform: x64
Expand Down
20 changes: 15 additions & 5 deletions .ci/lint_r_code.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,29 +33,37 @@ LINTERS_TO_USE <- list(
, "any_duplicated" = lintr::any_duplicated_linter()
, "any_is_na" = lintr::any_is_na_linter()
, "assignment" = lintr::assignment_linter()
, "boolean_arithmetic" = lintr::boolean_arithmetic_linter()
, "braces" = lintr::brace_linter()
, "class_equals" = lintr::class_equals_linter()
, "commas" = lintr::commas_linter()
, "duplicate_argument" = lintr::duplicate_argument_linter()
, "empty_assignment" = lintr::empty_assignment_linter()
, "equals_na" = lintr::equals_na_linter()
, "for_loop_index" = lintr::for_loop_index_linter()
, "function_left" = lintr::function_left_parentheses_linter()
, "implicit_integers" = lintr::implicit_integer_linter()
, "infix_spaces" = lintr::infix_spaces_linter()
, "inner_combine" = lintr::inner_combine_linter()
, "is_numeric" = lintr::is_numeric_linter()
, "fixed_regex" = lintr::fixed_regex_linter()
, "function_return" = lintr::function_return_linter()
, "lengths" = lintr::lengths_linter()
, "literal_coercion" = lintr::literal_coercion_linter()
, "long_lines" = lintr::line_length_linter(length = 120L)
, "matrix" = lintr::matrix_apply_linter()
, "missing_argument" = lintr::missing_argument_linter()
, "no_tabs" = lintr::no_tab_linter()
, "non_portable_path" = lintr::nonportable_path_linter()
, "numeric_leading_zero" = lintr::numeric_leading_zero_linter()
, "outer_negation" = lintr::outer_negation_linter()
, "package_hooks" = lintr::package_hooks_linter()
, "paste" = lintr::paste_linter()
, "quotes" = lintr::quotes_linter()
, "redundant_equals" = lintr::redundant_equals_linter()
, "regex_subset" = lintr::regex_subset_linter()
, "routine_registration" = lintr::routine_registration_linter()
, "semicolon" = lintr::semicolon_linter()
, "seq" = lintr::seq_linter()
, "single_quotes" = lintr::single_quotes_linter()
, "spaces_inside" = lintr::spaces_inside_linter()
, "spaces_left_parens" = lintr::spaces_left_parentheses_linter()
, "sprintf" = lintr::sprintf_linter()
Expand Down Expand Up @@ -96,9 +104,11 @@ LINTERS_TO_USE <- list(
, "??" = interactive_text
)
)
, "unneeded_concatenation" = lintr::unneeded_concatenation_linter()
, "unreachable_code" = lintr::unreachable_code_linter()
, "vector_logic" = lintr::vector_logic_linter()
, "unnecessary_concatenation" = lintr::unnecessary_concatenation_linter()
, "unnecessary_lambda" = lintr::unnecessary_lambda_linter()
, "unreachable_code" = lintr::unreachable_code_linter()
, "vector_logic" = lintr::vector_logic_linter()
, "whitespace" = lintr::whitespace_linter()
)

noquote(paste0(length(FILES_TO_LINT), " R files need linting"))
Expand Down
14 changes: 10 additions & 4 deletions .ci/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ if [[ $TASK == "lint" ]]; then
cpplint \
isort \
mypy \
'r-lintr>=3.0' \
'r-lintr>=3.1' \
ruff
source activate $CONDA_ENV
echo "Linting Python code"
Expand Down Expand Up @@ -119,15 +119,21 @@ if [[ $TASK == "check-docs" ]] || [[ $TASK == "check-links" ]]; then
exit 0
fi

# older versions of Dask are incompatible with pandas>=2.0, but not all conda packages' metadata accurately reflects that
#
# ref: https://github.com/microsoft/LightGBM/issues/6030
CONSTRAINED_DEPENDENCIES="'dask-core>=2023.5.0' 'distributed>=2023.5.0' 'pandas>=2.0'"
if [[ $PYTHON_VERSION == "3.7" ]]; then
CONSTRAINED_DEPENDENCIES="'dask-core' 'distributed 'pandas<2.0'"
fi

# including python=version[build=*cpython] to ensure that conda doesn't fall back to pypy
conda create -q -y -n $CONDA_ENV \
${CONSTRAINED_DEPENDENCIES} \
cloudpickle \
dask-core \
distributed \
joblib \
matplotlib \
numpy \
pandas \
psutil \
pytest \
${CONDA_PYTHON_REQUIREMENT} \
Expand Down
3 changes: 3 additions & 0 deletions .ci/test_r_package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ fi
#
# `devscripts` is required for 'checkbashisms' (https://github.com/r-lib/actions/issues/111)
if [[ $OS_NAME == "linux" ]]; then
mkdir -p ~/.gnupg
echo "disable-ipv6" >> ~/.gnupg/dirmngr.conf
sudo apt-key adv \
--homedir ~/.gnupg \
--keyserver keyserver.ubuntu.com \
--recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 || exit -1
sudo add-apt-repository \
Expand Down
2 changes: 1 addition & 1 deletion .ci/test_windows.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ if (($env:TASK -eq "regular") -or (($env:APPVEYOR -eq "true") -and ($env:TASK -e
cd $env:BUILD_SOURCESDIRECTORY/examples/python-guide
@("import matplotlib", "matplotlib.use('Agg')") + (Get-Content "plot_example.py") | Set-Content "plot_example.py"
(Get-Content "plot_example.py").replace('graph.render(view=True)', 'graph.render(view=False)') | Set-Content "plot_example.py" # prevent interactive window mode
conda install -q -y -n $env:CONDA_ENV h5py ipywidgets notebook
conda install -q -y -n $env:CONDA_ENV "h5py>3.0" ipywidgets notebook
foreach ($file in @(Get-ChildItem *.py)) {
@("import sys, warnings", "warnings.showwarning = lambda message, category, filename, lineno, file=None, line=None: sys.stdout.write(warnings.formatwarning(message, category, filename, lineno, line))") + (Get-Content $file) | Set-Content $file
python $file ; Check-Output $?
Expand Down
54 changes: 1 addition & 53 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -7,56 +7,4 @@
# offer a reasonable automatic best-guess

# catch-all rule (this only gets matched if no rules below match)
* @guolinke @StrikerRUS @jameslamb @shiyu1994

# other catch-alls that will get matched if specific rules below are not matched
*.R @jameslamb @jmoralez
*.py @StrikerRUS @jmoralez @jameslamb @shiyu1994
*.cpp @guolinke @shiyu1994
*.h @guolinke @shiyu1994

# main C++ code
include/ @guolinke @shiyu1994
src/ @guolinke @shiyu1994
CMakeLists.txt @guolinke @jameslamb @StrikerRUS @shiyu1994
tests/c_api_test/ @guolinke @shiyu1994
tests/cpp_tests/ @guolinke @shiyu1994
tests/data/ @guolinke @shiyu1994
windows/ @guolinke @StrikerRUS @shiyu1994

# R code
build_r.R @jameslamb @StrikerRUS @jmoralez
build-cran-package.sh @jameslamb @StrikerRUS @jmoralez
R-package/ @jameslamb @jmoralez

# Python code
python-package/ @StrikerRUS @shiyu1994 @jameslamb @jmoralez

# Dask integration
python-package/lightgbm/dask.py @jameslamb @jmoralez
tests/python_package_test/test_dask.py @jameslamb @jmoralez

# helpers
helpers/ @StrikerRUS @guolinke

# CI administrative stuff
.ci/ @StrikerRUS @jameslamb
docs/ @StrikerRUS @jameslamb
examples/ @StrikerRUS @jameslamb @guolinke @jmoralez
*.yml @StrikerRUS @jameslamb
.vsts-ci.yml @StrikerRUS @jameslamb

# docker setup
docker/ @StrikerRUS @jameslamb
docker/dockerfile-cli @guolinke @shiyu1994 @StrikerRUS @jameslamb
docker/gpu/ @StrikerRUS @jameslamb
docker/dockerfile-python @StrikerRUS @shiyu1994 @jameslamb @jmoralez
docker/dockerfile-r @jameslamb @jmoralez

# GPU code
docs/GPU-*.rst @shiyu1994 @guolinke
src/treelearner/gpu_tree_learner.cpp @guolinke @shiyu1994
src/treelearner/tree_learner.cpp @guolinke @shiyu1994

# JAVA code
swig/ @guolinke @shiyu1994
* @guolinke @jameslamb @shiyu1994 @jmoralez
12 changes: 6 additions & 6 deletions R-package/R/callback.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ CB_ENV <- R6::R6Class(
)

# Format the evaluation metric string
format.eval.string <- function(eval_res, eval_err) {
.format_eval_string <- function(eval_res, eval_err) {

# Check for empty evaluation string
if (is.null(eval_res) || length(eval_res) == 0L) {
Expand All @@ -40,7 +40,7 @@ format.eval.string <- function(eval_res, eval_err) {

}

merge.eval.string <- function(env) {
.merge_eval_string <- function(env) {

# Check length of evaluation list
if (length(env$eval_list) <= 0L) {
Expand All @@ -63,7 +63,7 @@ merge.eval.string <- function(env) {
}

# Set error message
msg <- c(msg, format.eval.string(eval_res = env$eval_list[[j]], eval_err = eval_err))
msg <- c(msg, .format_eval_string(eval_res = env$eval_list[[j]], eval_err = eval_err))

}

Expand All @@ -86,11 +86,11 @@ cb_print_evaluation <- function(period) {
if ((i - 1L) %% period == 0L || is.element(i, c(env$begin_iteration, env$end_iteration))) {

# Merge evaluation string
msg <- merge.eval.string(env = env)
msg <- .merge_eval_string(env = env)

# Check if message is existing
if (nchar(msg) > 0L) {
print(merge.eval.string(env = env))
print(.merge_eval_string(env = env))
}

}
Expand Down Expand Up @@ -270,7 +270,7 @@ cb_early_stop <- function(stopping_rounds, first_metric_only, verbose) {

# Prepare to print if verbose
if (verbose) {
best_msg[[i]] <<- as.character(merge.eval.string(env = env))
best_msg[[i]] <<- as.character(.merge_eval_string(env = env))
}

} else {
Expand Down
10 changes: 9 additions & 1 deletion R-package/R/lgb.Booster.R
Original file line number Diff line number Diff line change
Expand Up @@ -928,6 +928,7 @@ NULL
#' , metric = "l2"
#' , min_data = 1L
#' , learning_rate = 1.0
#' , num_threads = 2L
#' )
#' valids <- list(test = dtest)
#' model <- lgb.train(
Expand Down Expand Up @@ -1086,7 +1087,10 @@ predict.lgb.Booster <- function(object,
#' X <- as.matrix(mtcars[, -1L])
#' y <- mtcars[, 1L]
#' dtrain <- lgb.Dataset(X, label = y, params = list(max_bin = 5L))
#' params <- list(min_data_in_leaf = 2L)
#' params <- list(
#' min_data_in_leaf = 2L
#' , num_threads = 2L
#' )
#' model <- lgb.train(
#' params = params
#' , data = dtrain
Expand Down Expand Up @@ -1231,6 +1235,7 @@ summary.lgb.Booster <- function(object, ...) {
#' , metric = "l2"
#' , min_data = 1L
#' , learning_rate = 1.0
#' , num_threads = 2L
#' )
#' valids <- list(test = dtest)
#' model <- lgb.train(
Expand Down Expand Up @@ -1296,6 +1301,7 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
#' , metric = "l2"
#' , min_data = 1L
#' , learning_rate = 1.0
#' , num_threads = 2L
#' )
#' valids <- list(test = dtest)
#' model <- lgb.train(
Expand Down Expand Up @@ -1351,6 +1357,7 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
#' , metric = "l2"
#' , min_data = 1L
#' , learning_rate = 1.0
#' , num_threads = 2L
#' )
#' valids <- list(test = dtest)
#' model <- lgb.train(
Expand Down Expand Up @@ -1401,6 +1408,7 @@ lgb.dump <- function(booster, num_iteration = NULL) {
#' , metric = "l2"
#' , min_data = 1L
#' , learning_rate = 1.0
#' , num_threads = 2L
#' )
#' valids <- list(test = dtest)
#' model <- lgb.train(
Expand Down
7 changes: 3 additions & 4 deletions R-package/R/lgb.Dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -494,11 +494,10 @@ Dataset <- R6::R6Class(
if (info_len > 0L) {

# Get back fields
ret <- NULL
ret <- if (field_name == "group") {
integer(info_len)
if (field_name == "group") {
ret <- integer(info_len)
} else {
numeric(info_len)
ret <- numeric(info_len)
}

.Call(
Expand Down
2 changes: 0 additions & 2 deletions R-package/R/lgb.Predictor.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,6 @@ Predictor <- R6::R6Class(
start_iteration <- 0L
}

num_row <- 0L

# Check if data is a file name and not a matrix
if (identical(class(data), "character") && length(data) == 1L) {

Expand Down
4 changes: 0 additions & 4 deletions R-package/R/lgb.convert_with_rules.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,6 @@ lgb.convert_with_rules <- function(data, rules = NULL) {

column_classes <- .get_column_classes(df = data)

is_char <- which(column_classes == "character")
is_factor <- which(column_classes == "factor")
is_logical <- which(column_classes == "logical")

is_data_table <- data.table::is.data.table(x = data)
is_data_frame <- is.data.frame(data)

Expand Down
3 changes: 1 addition & 2 deletions R-package/R/lgb.cv.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ CVBooster <- R6::R6Class(
#' , metric = "l2"
#' , min_data = 1L
#' , learning_rate = 1.0
#' , num_threads = 2L
#' )
#' model <- lgb.cv(
#' params = params
Expand Down Expand Up @@ -224,8 +225,6 @@ lgb.cv <- function(params = list()
stop(sQuote("folds"), " must be a list with 2 or more elements that are vectors of indices for each CV-fold")
}

nfold <- length(folds)

} else {

if (nfold <= 1L) {
Expand Down
1 change: 1 addition & 0 deletions R-package/R/lgb.importance.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#' , max_depth = -1L
#' , min_data_in_leaf = 1L
#' , min_sum_hessian_in_leaf = 1.0
#' , num_threads = 2L
#' )
#' model <- lgb.train(
#' params = params
Expand Down
5 changes: 4 additions & 1 deletion R-package/R/lgb.interprete.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#' , max_depth = -1L
#' , min_data_in_leaf = 1L
#' , min_sum_hessian_in_leaf = 1.0
#' , num_threads = 2L
#' )
#' model <- lgb.train(
#' params = params
Expand Down Expand Up @@ -71,7 +72,9 @@ lgb.interprete <- function(model,
leaf_index_dt <- data.table::as.data.table(x = pred_mat)
leaf_index_mat_list <- lapply(
X = leaf_index_dt
, FUN = function(x) matrix(x, ncol = num_class, byrow = TRUE)
, FUN = matrix
, ncol = num_class
, byrow = TRUE
)

# Get list of trees
Expand Down
1 change: 1 addition & 0 deletions R-package/R/lgb.model.dt.tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#' , max_depth = -1L
#' , min_data_in_leaf = 1L
#' , min_sum_hessian_in_leaf = 1.0
#' , num_threads = 2L
#' )
#' model <- lgb.train(params, dtrain, 10L)
#'
Expand Down
1 change: 1 addition & 0 deletions R-package/R/lgb.plot.importance.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#' , learning_rate = 0.1
#' , min_data_in_leaf = 1L
#' , min_sum_hessian_in_leaf = 1.0
#' , num_threads = 2L
#' )
#'
#' model <- lgb.train(
Expand Down
1 change: 1 addition & 0 deletions R-package/R/lgb.plot.interpretation.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#' , max_depth = -1L
#' , min_data_in_leaf = 1L
#' , min_sum_hessian_in_leaf = 1.0
#' , num_threads = 2L
#' )
#' model <- lgb.train(
#' params = params
Expand Down
4 changes: 3 additions & 1 deletion R-package/R/lgb.restore_handle.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
#' , agaricus.train$label
#' , params = list(objective = "binary")
#' , nrounds = 5L
#' , verbose = 0)
#' , verbose = 0
#' , num_threads = 2L
#' )
#' fname <- tempfile(fileext="rds")
#' saveRDS(model, fname)
#'
Expand Down
Loading

0 comments on commit 601316b

Please sign in to comment.