fix: remove inlining of sigmoid_fast pre 1.11- #597

avik-pal · 2024-07-16T01:10:40Z

Without this the following errors on 1.10

using CUDA, NNlib

x = cu(rand(Float32, 10, 10))
b = cu(rand(Float32, 10, 10))

f = sigmoid_fast ∘ +

f.(x, b)

f = swish ∘ +

f.(x, b)

Error

ERROR: InvalidIRError: compiling MethodInstance for (::GPUArrays.var"#35#37")(::CUDA.CuKernelContext, ::CuDeviceMatrix{Float32, 1}, ::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{…}, Tuple{…}, ComposedFunction{…}, Tuple{…}}, ::Int64) resulted in invalid LLVM IR
Reason: unsupported dynamic function invocation (call to var"#_#103"(kw::Base.Pairs{Symbol, V, Tuple{Vararg{Symbol, N}}, NamedTuple{names, T}} where {V, N, names, T<:Tuple{Vararg{Any, N}}}, c::ComposedFunction, x...) @ Base operators.jl:1041)
Stacktrace:
 [1] ComposedFunction
   @ ./operators.jl:1041
 [2] _broadcast_getindex_evalf
   @ ./broadcast.jl:709
 [3] _broadcast_getindex
   @ ./broadcast.jl:682
 [4] getindex
   @ ./broadcast.jl:636
 [5] #35
   @ ~/.julia/packages/GPUArrays/8Y80U/src/host/broadcast.jl:70
Hint: catch this exception as `err` and call `code_typed(err; interactive = true)` to introspect the erronous code with Cthulhu.jl
Stacktrace:
  [1] check_ir(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, args::LLVM.Module)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/validation.jl:147
  [2] macro expansion
    @ ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:458 [inlined]
  [3] macro expansion
    @ ~/.julia/packages/TimerOutputs/Lw5SP/src/TimerOutput.jl:253 [inlined]
  [4] macro expansion
    @ ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:457 [inlined]
  [5] emit_llvm(job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, only_entry::Bool, validate::Bool)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/utils.jl:103
  [6] emit_llvm
    @ ~/.julia/packages/GPUCompiler/Y4hSX/src/utils.jl:97 [inlined]
  [7] codegen(output::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:136
  [8] codegen
    @ ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:115 [inlined]
  [9] compile(target::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:111
 [10] compile
    @ ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:103 [inlined]
 [11] #1145
    @ ~/.julia/packages/CUDA/Tl08O/src/compiler/compilation.jl:254 [inlined]
 [12] JuliaContext(f::CUDA.var"#1145#1148"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}}; kwargs::@Kwargs{})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:52
 [13] JuliaContext(f::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/driver.jl:42
 [14] compile(job::GPUCompiler.CompilerJob)
    @ CUDA ~/.julia/packages/CUDA/Tl08O/src/compiler/compilation.jl:253
 [15] actual_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link))
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/execution.jl:237
 [16] cached_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::Function, linker::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Y4hSX/src/execution.jl:151
 [17] macro expansion
    @ ~/.julia/packages/CUDA/Tl08O/src/compiler/execution.jl:369 [inlined]
 [18] macro expansion
    @ ./lock.jl:267 [inlined]
 [19] cufunction(f::GPUArrays.var"#35#37", tt::Type{Tuple{CUDA.CuKernelContext, CuDeviceMatrix{Float32, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{…}, Tuple{…}, ComposedFunction{…}, Tuple{…}}, Int64}}; kwargs::@Kwargs{})
    @ CUDA ~/.julia/packages/CUDA/Tl08O/src/compiler/execution.jl:364
 [20] cufunction
    @ ~/.julia/packages/CUDA/Tl08O/src/compiler/execution.jl:361 [inlined]
 [21] macro expansion
    @ ~/.julia/packages/CUDA/Tl08O/src/compiler/execution.jl:112 [inlined]
 [22] #launch_heuristic#1204
    @ ~/.julia/packages/CUDA/Tl08O/src/gpuarrays.jl:17 [inlined]
 [23] launch_heuristic
    @ ~/.julia/packages/CUDA/Tl08O/src/gpuarrays.jl:15 [inlined]
 [24] _copyto!
    @ ~/.julia/packages/GPUArrays/8Y80U/src/host/broadcast.jl:78 [inlined]
 [25] copyto!
    @ ~/.julia/packages/GPUArrays/8Y80U/src/host/broadcast.jl:44 [inlined]
 [26] copy
    @ ~/.julia/packages/GPUArrays/8Y80U/src/host/broadcast.jl:29 [inlined]
 [27] materialize(bc::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2, CUDA.DeviceMemory}, Nothing, ComposedFunction{typeof(swish), typeof(+)}, Tuple{CuArray{Float32, 2, CUDA.DeviceMemory}, CuArray{Float32, 2, CUDA.DeviceMemory}}})
    @ Base.Broadcast ./broadcast.jl:903
 [28] top-level scope
    @ REPL[13]:1
 [29] top-level scope
    @ none:1
Some type information was truncated. Use `show(err)` to see complete types.

avik-pal mentioned this pull request Jul 16, 2024

Remove special handling for swish and sigmoid_fast LuxDL/LuxLib.jl#92

Closed

fix: remove inlining of sigmoid_fast pre 1.11-

5b85d10

avik-pal force-pushed the ap/force_inlining branch from 5d9ceb3 to 5b85d10 Compare July 16, 2024 05:24

CarloLucibello approved these changes Jul 16, 2024

View reviewed changes

CarloLucibello merged commit 1ede301 into FluxML:master Jul 16, 2024
11 of 13 checks passed

avik-pal deleted the ap/force_inlining branch July 16, 2024 15:35

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

fix: remove inlining of sigmoid_fast pre 1.11- #597

fix: remove inlining of sigmoid_fast pre 1.11- #597

avik-pal commented Jul 16, 2024

fix: remove inlining of sigmoid_fast pre 1.11- #597

fix: remove inlining of sigmoid_fast pre 1.11- #597

Conversation

avik-pal commented Jul 16, 2024