Skip to content

Commit

Permalink
fix: update to new reactant changes (#1140)
Browse files Browse the repository at this point in the history
* fix: update to new reactant changes

* fix: use enzyme correctly

fix: update training code

* feat: handle optimisers correctly

* fix: upstreamed Reactant patches

* fix: don't force ::Real

* fix: package versions and some test fixes

test: try fixing load order

revert: load order change
  • Loading branch information
avik-pal authored Dec 30, 2024
1 parent ac2879b commit 3c3a432
Show file tree
Hide file tree
Showing 35 changed files with 398 additions and 153 deletions.
8 changes: 4 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Lux"
uuid = "b2108857-7c20-44ae-9111-449ecde12c47"
authors = ["Avik Pal <[email protected]> and contributors"]
version = "1.4.2"
version = "1.4.3"

[deps]
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
Expand Down Expand Up @@ -83,7 +83,7 @@ Adapt = "4.1"
ArgCheck = "2.3"
ArrayInterface = "7.17.1"
CUDA = "5.3.2"
ChainRulesCore = "1.24"
ChainRulesCore = "1.25"
Compat = "4.16"
ComponentArrays = "0.15.18"
ConcreteStructs = "0.2.3"
Expand All @@ -106,11 +106,11 @@ MPI = "0.20.19"
MacroTools = "0.5.13"
Markdown = "1.10"
NCCL = "0.1.1"
NNlib = "0.9.24"
NNlib = "0.9.26"
Optimisers = "0.4.1"
Preferences = "1.4.3"
Random = "1.10"
Reactant = "0.2.8"
Reactant = "0.2.12"
Reexport = "1.2.2"
ReverseDiff = "1.15"
SIMDTypes = "0.1"
Expand Down
6 changes: 3 additions & 3 deletions docs/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
[compat]
ADTypes = "1.10"
Adapt = "4"
ChainRulesCore = "1.24"
ChainRulesCore = "1.25"
ComponentArrays = "0.15.18"
Documenter = "1.4"
DocumenterVitepress = "0.1.3"
Expand All @@ -51,12 +51,12 @@ LuxCore = "1.2"
LuxLib = "1.3.4"
LuxTestUtils = "1.5"
MLDataDevices = "1.6"
NNlib = "0.9.24"
NNlib = "0.9.26"
Optimisers = "0.4.1"
Pkg = "1.10"
Printf = "1.10"
Random = "1.10"
Reactant = "0.2.8"
Reactant = "0.2.12"
StableRNGs = "1"
StaticArrays = "1"
WeightInitializers = "1"
Expand Down
2 changes: 1 addition & 1 deletion docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ pages = [
"tutorials/intermediate/1_NeuralODE.md",
"tutorials/intermediate/2_BayesianNN.md",
"tutorials/intermediate/3_HyperNet.md",
"tutorials/intermediate/4_PINN2DPDE.md"
"tutorials/intermediate/4_PINN2DPDE.md",
],
"Advanced" => [
"tutorials/advanced/1_GravitationalWaveForm.md"
Expand Down
13 changes: 11 additions & 2 deletions ext/LuxReactantExt/LuxReactantExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,22 @@ module LuxReactantExt

using Enzyme: Enzyme, Const, Duplicated, Active
using Optimisers: Optimisers
using Reactant: Reactant, @compile, TracedRArray, TracedRNumber
using Reactant: Reactant, @compile, AnyTracedRArray, TracedRArray, TracedRNumber
using Setfield: @set!
using Static: False

using Lux: Lux, LuxOps, Training
using Lux: Lux, LuxOps, Training, Utils
using Lux.Training: TrainingBackendCache, ReactantBackend

Lux.is_extension_loaded(::Val{:Reactant}) = true

Utils.to_rarray(x; kwargs...) = Reactant.to_rarray(x; kwargs...)

function Utils.promote_to(::Type{T}, x::Number) where {T <: Number}
x isa Reactant.TracedType && return x
return Reactant.ConcreteRNumber{T}(x)
end

include("patches.jl")
include("training.jl")

Expand Down
3 changes: 3 additions & 0 deletions ext/LuxReactantExt/patches.jl
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
Utils.vec(x::AnyTracedRArray) = Reactant.TracedUtils.materialize_traced_array(vec(x))

# XXX: Use PoolDims once EnzymeJAX supports stablehlo.reduce_window adjoint
Lux.calculate_pool_dims(g::Lux.GlobalPoolMode, ::TracedRArray) = g
86 changes: 56 additions & 30 deletions ext/LuxReactantExt/training.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,28 @@
mutable struct StatsAndNewStateWrapper
stats::Any
st::Any
end

function wrapped_objective_function(
fn::F, model, ps, st, data, cache::StatsAndNewStateWrapper
) where {F}
loss, stₙ, stats = fn(model, ps, st, data)
cache.stats = stats
cache.st = stₙ
return loss
end

function compute_gradients_internal(objective_function::F, model, data, ps, st) where {F}
stats_wrapper = StatsAndNewStateWrapper(nothing, nothing)
res = Enzyme.gradient(
Enzyme.set_abi(Enzyme.ReverseWithPrimal, Reactant.ReactantABI),
Const(wrapped_objective_function), Const(objective_function),
Const(model), ps, Const(st), Const(data), Const(stats_wrapper)
)
loss, dps = res.val, res.derivs[3]
return dps, loss, stats_wrapper.stats, stats_wrapper.st
end

function Lux.Training.compute_gradients_impl(
backend::ReactantBackend, objective_function::F,
data, ts::Training.TrainState) where {F}
Expand All @@ -22,18 +47,33 @@ function Lux.Training.compute_gradients_impl(::ReactantBackend, obj_fn::F, data,
return grads, loss, stats, ts
end

function compute_gradients_internal(objective_function::F, model, data, ps, st) where {F}
dps = Enzyme.make_zero(ps)
_, (loss, stₙ, stats) = Enzyme.autodiff(
Enzyme.ReverseWithPrimal, Const(objective_function), Active, Const(model),
Duplicated(ps, dps), Const(st), Const(data))
return dps, loss, stats, stₙ
end

for inplace in ("!", "")
fname = Symbol(:single_train_step_impl, inplace)
internal_fn = Symbol(:compute_gradients_internal_and_step, inplace)
apply_gradients_fn = Symbol(:apply_gradients, inplace)
update_fn = Symbol(:update, inplace)

# Ideally users never hit this dispatch but it is still good to have as a fallback
@eval function Lux.Training.$(apply_gradients_fn)(
ts::Training.TrainState{<:TrainingBackendCache{ReactantBackend}}, grads
)
if hasfield(typeof(ts.cache.extras), :update_function)
update_function = ts.cache.extras.update_function
else
update_function = @compile Optimisers.$(update_fn)(
ts.optimizer_state, ts.parameters, grads)
@set! ts.cache.extras = merge(ts.cache.extras, (; update_function))
end

opt_state, ps = update_function(ts.optimizer_state, ts.parameters, grads)
@set! ts.parameters = ps
@set! ts.optimizer_state = opt_state
@set! ts.step = ts.step + 1
return ts
end

# XXX: Should we add a check to ensure the inputs to this function is same as the one
# used in the compiled function? We can re-trigger the compilation with a warning
@eval function Lux.Training.$(fname)(backend::ReactantBackend, objective_function::F,
data, ts::Training.TrainState) where {F}
compiled_grad_and_step_function = @compile $(internal_fn)(
Expand Down Expand Up @@ -68,27 +108,13 @@ for inplace in ("!", "")

return grads, loss, stats, ts
end
end

function compute_gradients_internal_and_step(objective_function::F, model, data, ps,
st, opt_state) where {F}
dps = Enzyme.make_zero(ps)
_, (loss, stₙ, stats) = Enzyme.autodiff(
Enzyme.set_abi(Enzyme.ReverseWithPrimal, Reactant.ReactantABI),
Const(objective_function), Active, Const(model),
Duplicated(ps, dps), Const(st), Const(data))
opt_state, ps = Optimisers.update(opt_state, ps, dps)
return dps, ps, loss, stats, stₙ, opt_state
end

function compute_gradients_internal_and_step!(objective_function::F, model, data, ps,
st, opt_state) where {F}
dps = Enzyme.make_zero(ps)
_, (loss, stₙ, stats) = Enzyme.autodiff(
Enzyme.set_abi(Enzyme.ReverseWithPrimal, Reactant.ReactantABI),
Const(objective_function), Active, Const(model),
Duplicated(ps, dps), Const(st), Const(data))
# XXX: Inplace updates not actually inplace
opt_state, ps = Optimisers.update!(opt_state, ps, dps)
return dps, ps, loss, stats, stₙ, opt_state
# XXX: Inplace version not actually inplace
@eval function $(internal_fn)(
objective_function::F, model, data, ps, st, opt_state) where {F}
dps, loss, stats, stₙ = compute_gradients_internal(
objective_function, model, data, ps, st)
opt_state, ps = Optimisers.$(update_fn)(opt_state, ps, dps)
return dps, ps, loss, stats, stₙ, opt_state
end
end
4 changes: 2 additions & 2 deletions lib/LuxLib/Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "LuxLib"
uuid = "82251201-b29d-42c6-8e01-566dec8acb11"
authors = ["Avik Pal <[email protected]> and contributors"]
version = "1.3.10"
version = "1.3.11"

[deps]
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
Expand Down Expand Up @@ -77,7 +77,7 @@ LuxCore = "1.2"
MKL = "0.7"
MLDataDevices = "1.6"
Markdown = "1.10"
NNlib = "0.9.24"
NNlib = "0.9.26"
Octavian = "0.3.28"
Preferences = "1.4.3"
Polyester = "0.7.15"
Expand Down
2 changes: 1 addition & 1 deletion lib/LuxLib/ext/LuxLibTrackerExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ for RM in (:TrackedVector, :Nothing, :AbstractVector),
Utils.is_tracked(RM, RV, S, B, XT) || continue

@eval Tracker.@grad_from_chainrules LuxLib.Impl.batchnorm_cudnn(
γ::$S, β::$B, x::$XT, rμ::$RM, rσ²::$RV, m::Real, ϵ::Real, training::StaticBool)
γ::$S, β::$B, x::$XT, rμ::$RM, rσ²::$RV, m, ϵ, training::StaticBool)
end

# Utils extensions
Expand Down
2 changes: 1 addition & 1 deletion lib/LuxLib/ext/LuxLibcuDNNExt/LuxLibcuDNNExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ include("batchnorm.jl")
function Impl.batchnorm(x::Union{<:CuArray{T, 2}, <:CuArray{T, 4}, <:CuArray{T, 5}},
γ::Optional{<:CuVector{T}}, β::Optional{<:CuVector{T}},
::Optional{<:CuVector{T}}, rσ²::Optional{<:CuVector{T}},
training::StaticBool, σ::F, m::Real, ϵ::Real) where {T <: cuDNNFloat, F}
training::StaticBool, σ::F, m, ϵ) where {T <: cuDNNFloat, F}
rμₙ, rσ²ₙ = Impl.get_batchnorm_statistics(x, rμ, rσ², training)
y = Impl.batchnorm_cudnn(γ, β, x, rμₙ, rσ²ₙ, m, ϵ, training)[1]
return Impl.activation!!(σ, y), safe_vec(rμₙ), safe_vec(rσ²ₙ)
Expand Down
2 changes: 1 addition & 1 deletion lib/LuxLib/src/api/batchnorm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ mean and variance.
function batchnorm(x::AbstractArray{T, N}, γ::Optional{<:AbstractVector},
β::Optional{<:AbstractVector}, rμ::Optional{<:AbstractVector},
rσ²::Optional{<:AbstractVector}, training::TrainingType, act::F=identity,
momentum::Real=0.1f0, epsilon::Real=default_epsilon(x)) where {F, T, N}
momentum=0.1f0, epsilon=default_epsilon(x)) where {F, T, N}
σ = select_fastest_activation(act, x, γ, β, rμ, rσ²)
y, rμ, rσ² = batchnorm_impl(
x, γ, β, rμ, rσ², static_training_mode(training, x, γ, β, rμ, rσ²),
Expand Down
4 changes: 2 additions & 2 deletions lib/LuxLib/src/api/groupnorm.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
@doc doc"""
groupnorm(x, scale, bias, groups::Int, σ::F=identity,
epsilon::Real=eps(eltype(x)) ^ (5 // 7))
epsilon=eps(eltype(x)) ^ (5 // 7))
Group Normalization. For details see [1].
Expand Down Expand Up @@ -30,7 +30,7 @@ The normalized array is returned.
"""
function groupnorm(x::AbstractArray{<:Real, N}, scale::Optional{<:AbstractVector},
bias::Optional{<:AbstractVector}, groups::Int, σ::F=identity,
epsilon::Real=default_epsilon(x)) where {F, N}
epsilon=default_epsilon(x)) where {F, N}
assert_valid_groupnorm_arguments(x, scale, bias, groups)
return groupnorm_impl(
x, scale, bias, groups, select_fastest_activation(σ, x, scale, bias), epsilon)
Expand Down
4 changes: 2 additions & 2 deletions lib/LuxLib/src/api/instancenorm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,15 @@ mean and variance.
"""
function instancenorm(x::AbstractArray, γ::Optional{<:AbstractVector},
β::Optional{<:AbstractVector}, training::TrainingType,
σ::F=identity, epsilon::Real=default_epsilon(x)) where {F}
σ::F=identity, epsilon=default_epsilon(x)) where {F}
# This API is kept for legacy purposes when we didn't support passing running stats
return instancenorm(x, γ, β, nothing, nothing, training, σ, nothing, epsilon)
end

function instancenorm(x::AbstractArray, γ::Optional{<:AbstractVector},
β::Optional{<:AbstractVector}, rμ::Optional{<:AbstractVector},
rσ²::Optional{<:AbstractVector}, training::TrainingType, σ::F=identity,
momentum::Optional{<:Real}=0.1f0, epsilon::Real=default_epsilon(x)) where {F}
momentum::Optional{<:Real}=0.1f0, epsilon=default_epsilon(x)) where {F}
assert_valid_instancenorm_arguments(x)

y, rμₙ, rσ²ₙ = instancenorm_impl(
Expand Down
2 changes: 1 addition & 1 deletion lib/LuxLib/src/api/layernorm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ Normalized Array of same size as `x`.
"""
function layernorm(x::AbstractArray{xT, N}, scale::Optional{<:AbstractArray},
bias::Optional{<:AbstractArray}, σ::F=identity, dims=1:(N - 1),
epsilon::Real=default_epsilon(x)) where {F, xT, N}
epsilon=default_epsilon(x)) where {F, xT, N}
return layernorm_impl(
x, scale, bias, select_fastest_activation(σ, x, scale, bias), dims, epsilon)
end
Loading

4 comments on commit 3c3a432

@avik-pal
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@avik-pal
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register subdir=lib/LuxLib

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/122185

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v1.4.3 -m "<description of version>" 3c3a43266f688f5d7eed6fe1ca61e2ad2073dbe4
git push origin v1.4.3

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/122186

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a LuxLib-v1.3.11 -m "<description of version>" 3c3a43266f688f5d7eed6fe1ca61e2ad2073dbe4
git push origin LuxLib-v1.3.11

Please sign in to comment.