-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
specialize reshape
, ensuring it returns FixedSizeArray
#41
Conversation
This is analogous with the behavior for `Array` and should make type stability easier for some uses.
Codecov ReportAll modified and coverable lines are covered by tests ✅
Additional details and impacted files@@ Coverage Diff @@
## main #41 +/- ##
==========================================
+ Coverage 98.76% 98.83% +0.07%
==========================================
Files 1 1
Lines 81 86 +5
==========================================
+ Hits 80 85 +5
Misses 1 1 ☔ View full report in Codecov by Sentry. |
This is another interesting case where using FixedSizeArrays
@noinline f(v::AbstractVector{Float64}) = reshape(v, 2, 2)
g3() = f(FixedSizeVector{Float64}(undef, 3))
g4() = f(FixedSizeVector{Float64}(undef, 4))
h3() = f(Vector{Float64}(undef, 3))
h4() = f(Vector{Float64}(undef, 4))
code_llvm(g3, (); debuginfo=:none)
code_llvm(g4, (); debuginfo=:none)
code_llvm(h3, (); debuginfo=:none)
code_llvm(h4, (); debuginfo=:none) ; Function Signature: g3()
; Function Attrs: noreturn
define void @julia_g3_581() #0 {
top:
%gcframe1 = alloca [4 x ptr], align 16
call void @llvm.memset.p0.i64(ptr align 16 %gcframe1, i8 0, i64 32, i1 true)
%gc_slot_addr1 = getelementptr inbounds ptr, ptr %gcframe1, i64 3
%0 = getelementptr inbounds ptr, ptr %gcframe1, i64 2
%1 = alloca { ptr, [2 x i64] }, align 8
%2 = alloca { ptr, [1 x i64] }, align 8
%thread_ptr = call ptr asm "movq %fs:0, $0", "=r"() #9
%tls_ppgcstack = getelementptr i8, ptr %thread_ptr, i64 -8
%tls_pgcstack = load ptr, ptr %tls_ppgcstack, align 8
store i64 8, ptr %gcframe1, align 16
%frame.prev = getelementptr inbounds ptr, ptr %gcframe1, i64 1
%task.gcstack = load ptr, ptr %tls_pgcstack, align 8
store ptr %task.gcstack, ptr %frame.prev, align 8
store ptr %gcframe1, ptr %tls_pgcstack, align 8
%"Memory{Float64}[]" = call ptr @jl_alloc_genericmemory(ptr nonnull @"+Core.GenericMemory#583.jit", i64 3)
store ptr %"Memory{Float64}[]", ptr %gc_slot_addr1, align 8
store ptr %"Memory{Float64}[]", ptr %2, align 8
%.fca.1.0.gep = getelementptr inbounds { ptr, [1 x i64] }, ptr %2, i64 0, i32 1, i64 0
store i64 3, ptr %.fca.1.0.gep, align 8
call void @j_f_585(ptr noalias nocapture noundef nonnull sret({ ptr, [2 x i64] }) %1, ptr noalias nocapture noundef nonnull %0, ptr nocapture nonnull readonly %2)
call void @llvm.trap()
unreachable
}
; Function Signature: g4()
define void @julia_g4_586(ptr noalias nocapture noundef nonnull sret({ ptr, [2 x i64] }) align 8 dereferenceable(24) %sret_return, ptr noalias nocapture noundef nonnull align 8 dereferenceable(8) %return_roots) #0 {
top:
%gcframe1 = alloca [4 x ptr], align 16
call void @llvm.memset.p0.i64(ptr align 16 %gcframe1, i8 0, i64 32, i1 true)
%gc_slot_addr1 = getelementptr inbounds ptr, ptr %gcframe1, i64 3
%0 = getelementptr inbounds ptr, ptr %gcframe1, i64 2
%1 = alloca { ptr, [2 x i64] }, align 8
%2 = alloca { ptr, [1 x i64] }, align 8
%thread_ptr = call ptr asm "movq %fs:0, $0", "=r"() #11
%tls_ppgcstack = getelementptr i8, ptr %thread_ptr, i64 -8
%tls_pgcstack = load ptr, ptr %tls_ppgcstack, align 8
store i64 8, ptr %gcframe1, align 16
%frame.prev = getelementptr inbounds ptr, ptr %gcframe1, i64 1
%task.gcstack = load ptr, ptr %tls_pgcstack, align 8
store ptr %task.gcstack, ptr %frame.prev, align 8
store ptr %gcframe1, ptr %tls_pgcstack, align 8
%"Memory{Float64}[]" = call ptr @jl_alloc_genericmemory(ptr nonnull @"+Core.GenericMemory#589.jit", i64 4)
store ptr %"Memory{Float64}[]", ptr %gc_slot_addr1, align 8
store ptr %"Memory{Float64}[]", ptr %2, align 8
%.fca.1.0.gep = getelementptr inbounds { ptr, [1 x i64] }, ptr %2, i64 0, i32 1, i64 0
store i64 4, ptr %.fca.1.0.gep, align 8
call void @j_f_591(ptr noalias nocapture noundef nonnull sret({ ptr, [2 x i64] }) %1, ptr noalias nocapture noundef nonnull %0, ptr nocapture nonnull readonly %2)
%3 = load ptr, ptr %1, align 8
store ptr %3, ptr %return_roots, align 8
call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(24) %sret_return, ptr noundef nonnull align 8 dereferenceable(24) %1, i64 24, i1 false)
%frame.prev5 = load ptr, ptr %frame.prev, align 8
store ptr %frame.prev5, ptr %tls_pgcstack, align 8
ret void
}
; Function Signature: h3()
define nonnull ptr @julia_h3_592() #0 {
top:
%gcframe1 = alloca [3 x ptr], align 16
call void @llvm.memset.p0.i64(ptr align 16 %gcframe1, i8 0, i64 24, i1 true)
%thread_ptr = call ptr asm "movq %fs:0, $0", "=r"() #9
%tls_ppgcstack = getelementptr i8, ptr %thread_ptr, i64 -8
%tls_pgcstack = load ptr, ptr %tls_ppgcstack, align 8
store i64 4, ptr %gcframe1, align 16
%frame.prev = getelementptr inbounds ptr, ptr %gcframe1, i64 1
%task.gcstack = load ptr, ptr %tls_pgcstack, align 8
store ptr %task.gcstack, ptr %frame.prev, align 8
store ptr %gcframe1, ptr %tls_pgcstack, align 8
%"Memory{Float64}[]" = call ptr @jl_alloc_genericmemory(ptr nonnull @"+Core.GenericMemory#594.jit", i64 3)
%.data_ptr = getelementptr inbounds { i64, ptr }, ptr %"Memory{Float64}[]", i64 0, i32 1
%0 = load ptr, ptr %.data_ptr, align 8
%gc_slot_addr_0 = getelementptr inbounds ptr, ptr %gcframe1, i64 2
store ptr %"Memory{Float64}[]", ptr %gc_slot_addr_0, align 16
%ptls_field = getelementptr inbounds ptr, ptr %tls_pgcstack, i64 2
%ptls_load = load ptr, ptr %ptls_field, align 8
%"new::Array" = call noalias nonnull align 8 dereferenceable(32) ptr @ijl_gc_pool_alloc_instrumented(ptr %ptls_load, i32 800, i32 32, i64 131739362545776) #7
%"new::Array.tag_addr" = getelementptr inbounds i64, ptr %"new::Array", i64 -1
store atomic i64 131739362545776, ptr %"new::Array.tag_addr" unordered, align 8
%1 = getelementptr inbounds ptr, ptr %"new::Array", i64 1
store ptr %0, ptr %"new::Array", align 8
store ptr %"Memory{Float64}[]", ptr %1, align 8
%"new::Array.size_ptr" = getelementptr inbounds i8, ptr %"new::Array", i64 16
store i64 3, ptr %"new::Array.size_ptr", align 8
store ptr %"new::Array", ptr %gc_slot_addr_0, align 16
%2 = call nonnull ptr @j_f_598(ptr nonnull %"new::Array")
%frame.prev10 = load ptr, ptr %frame.prev, align 8
store ptr %frame.prev10, ptr %tls_pgcstack, align 8
ret ptr %2
}
; Function Signature: h4()
define nonnull ptr @julia_h4_599() #0 {
top:
%gcframe1 = alloca [3 x ptr], align 16
call void @llvm.memset.p0.i64(ptr align 16 %gcframe1, i8 0, i64 24, i1 true)
%thread_ptr = call ptr asm "movq %fs:0, $0", "=r"() #9
%tls_ppgcstack = getelementptr i8, ptr %thread_ptr, i64 -8
%tls_pgcstack = load ptr, ptr %tls_ppgcstack, align 8
store i64 4, ptr %gcframe1, align 16
%frame.prev = getelementptr inbounds ptr, ptr %gcframe1, i64 1
%task.gcstack = load ptr, ptr %tls_pgcstack, align 8
store ptr %task.gcstack, ptr %frame.prev, align 8
store ptr %gcframe1, ptr %tls_pgcstack, align 8
%"Memory{Float64}[]" = call ptr @jl_alloc_genericmemory(ptr nonnull @"+Core.GenericMemory#601.jit", i64 4)
%.data_ptr = getelementptr inbounds { i64, ptr }, ptr %"Memory{Float64}[]", i64 0, i32 1
%0 = load ptr, ptr %.data_ptr, align 8
%gc_slot_addr_0 = getelementptr inbounds ptr, ptr %gcframe1, i64 2
store ptr %"Memory{Float64}[]", ptr %gc_slot_addr_0, align 16
%ptls_field = getelementptr inbounds ptr, ptr %tls_pgcstack, i64 2
%ptls_load = load ptr, ptr %ptls_field, align 8
%"new::Array" = call noalias nonnull align 8 dereferenceable(32) ptr @ijl_gc_pool_alloc_instrumented(ptr %ptls_load, i32 800, i32 32, i64 131739362545776) #7
%"new::Array.tag_addr" = getelementptr inbounds i64, ptr %"new::Array", i64 -1
store atomic i64 131739362545776, ptr %"new::Array.tag_addr" unordered, align 8
%1 = getelementptr inbounds ptr, ptr %"new::Array", i64 1
store ptr %0, ptr %"new::Array", align 8
store ptr %"Memory{Float64}[]", ptr %1, align 8
%"new::Array.size_ptr" = getelementptr inbounds i8, ptr %"new::Array", i64 16
store i64 4, ptr %"new::Array.size_ptr", align 8
store ptr %"new::Array", ptr %gc_slot_addr_0, align 16
%2 = call nonnull ptr @j_f_605(ptr nonnull %"new::Array")
%frame.prev10 = load ptr, ptr %frame.prev, align 8
store ptr %frame.prev10, ptr %tls_pgcstack, align 8
ret ptr %2
} Julia correctly realises that Also, inferred effects are much better:
|
And with the inlined ; Function Signature: g3()
; Function Attrs: noreturn
define void @julia_g3_2050() #0 {
top:
%gcframe1 = alloca [3 x ptr], align 16
call void @llvm.memset.p0.i64(ptr align 16 %gcframe1, i8 0, i64 24, i1 true)
%thread_ptr = call ptr asm "movq %fs:0, $0", "=r"() #9
%tls_ppgcstack = getelementptr i8, ptr %thread_ptr, i64 -8
%tls_pgcstack = load ptr, ptr %tls_ppgcstack, align 8
store i64 4, ptr %gcframe1, align 16
%frame.prev = getelementptr inbounds ptr, ptr %gcframe1, i64 1
%task.gcstack = load ptr, ptr %tls_pgcstack, align 8
store ptr %task.gcstack, ptr %frame.prev, align 8
store ptr %gcframe1, ptr %tls_pgcstack, align 8
%0 = call [1 x ptr] @j_DimensionMismatch_2053(ptr nonnull @"jl_global#2054.jit")
%gc_slot_addr_0 = getelementptr inbounds ptr, ptr %gcframe1, i64 2
%1 = extractvalue [1 x ptr] %0, 0
store ptr %1, ptr %gc_slot_addr_0, align 16
%ptls_field = getelementptr inbounds ptr, ptr %tls_pgcstack, i64 2
%ptls_load = load ptr, ptr %ptls_field, align 8
%"box::DimensionMismatch" = call noalias nonnull align 8 dereferenceable(16) ptr @ijl_gc_pool_alloc_instrumented(ptr %ptls_load, i32 752, i32 16, i64 131739368165344) #7
%"box::DimensionMismatch.tag_addr" = getelementptr inbounds i64, ptr %"box::DimensionMismatch", i64 -1
store atomic i64 131739368165344, ptr %"box::DimensionMismatch.tag_addr" unordered, align 8
store ptr %1, ptr %"box::DimensionMismatch", align 8
call void @ijl_throw(ptr nonnull %"box::DimensionMismatch")
unreachable
}
; Function Signature: g4()
define void @julia_g4_2057(ptr noalias nocapture noundef nonnull sret({ ptr, [2 x i64] }) align 8 dereferenceable(24) %sret_return, ptr noalias nocapture noundef nonnull align 8 dereferenceable(8) %return_roots) #0 {
top:
%"Memory{Float64}[]" = call ptr @jl_alloc_genericmemory(ptr nonnull @"+Core.GenericMemory#2060.jit", i64 4)
store ptr %"Memory{Float64}[]", ptr %return_roots, align 8
store ptr %"Memory{Float64}[]", ptr %sret_return, align 8
%sret_return.repack1 = getelementptr inbounds { ptr, [2 x i64] }, ptr %sret_return, i64 0, i32 1
store i64 2, ptr %sret_return.repack1, align 8
%sret_return.repack1.repack3 = getelementptr inbounds { ptr, [2 x i64] }, ptr %sret_return, i64 0, i32 1, i64 1
store i64 2, ptr %sret_return.repack1.repack3, align 8
ret void
}
; Function Signature: h3()
define nonnull ptr @julia_h3_2061() #0 {
L8:
%"Memory{Float64}[]" = call ptr @jl_alloc_genericmemory(ptr nonnull @"+Core.GenericMemory#2063.jit", i64 3)
call void @j_throw_dmrsa_2068(ptr nocapture nonnull readonly @"_j_const#2", i64 signext 3) #7
unreachable
}
; Function Signature: h4()
define nonnull ptr @julia_h4_2072() #0 {
L20:
%gcframe1 = alloca [3 x ptr], align 16
call void @llvm.memset.p0.i64(ptr align 16 %gcframe1, i8 0, i64 24, i1 true)
%thread_ptr = call ptr asm "movq %fs:0, $0", "=r"() #9
%tls_ppgcstack = getelementptr i8, ptr %thread_ptr, i64 -8
%tls_pgcstack = load ptr, ptr %tls_ppgcstack, align 8
store i64 4, ptr %gcframe1, align 16
%frame.prev = getelementptr inbounds ptr, ptr %gcframe1, i64 1
%task.gcstack = load ptr, ptr %tls_pgcstack, align 8
store ptr %task.gcstack, ptr %frame.prev, align 8
store ptr %gcframe1, ptr %tls_pgcstack, align 8
%"Memory{Float64}[]" = call ptr @jl_alloc_genericmemory(ptr nonnull @"+Core.GenericMemory#2074.jit", i64 4)
%.data_ptr = getelementptr inbounds { i64, ptr }, ptr %"Memory{Float64}[]", i64 0, i32 1
%0 = load ptr, ptr %.data_ptr, align 8
%.unbox = load i64, ptr %"Memory{Float64}[]", align 16
%1 = icmp eq i64 %.unbox, 4
br i1 %1, label %L26, label %L23
L23: ; preds = %L20
%gc_slot_addr_0 = getelementptr inbounds ptr, ptr %gcframe1, i64 2
store ptr %"Memory{Float64}[]", ptr %gc_slot_addr_0, align 16
%2 = call nonnull ptr @jlplt_jl_genericmemory_slice_2081_got.jit(ptr nonnull %"Memory{Float64}[]", ptr nonnull %0, i64 4)
%.data_ptr17 = getelementptr inbounds { i64, ptr }, ptr %2, i64 0, i32 1
%3 = load ptr, ptr %.data_ptr17, align 8
br label %L26
L26: ; preds = %L23, %L20
%.pn22 = phi ptr [ %3, %L23 ], [ %0, %L20 ]
%.pn20 = phi ptr [ %2, %L23 ], [ %"Memory{Float64}[]", %L20 ]
%gc_slot_addr_026 = getelementptr inbounds ptr, ptr %gcframe1, i64 2
store ptr %.pn20, ptr %gc_slot_addr_026, align 16
%ptls_field = getelementptr inbounds ptr, ptr %tls_pgcstack, i64 2
%ptls_load = load ptr, ptr %ptls_field, align 8
%"new::Array" = call noalias nonnull align 8 dereferenceable(48) ptr @ijl_gc_pool_alloc_instrumented(ptr %ptls_load, i32 848, i32 48, i64 131739509020240) #6
%"new::Array.tag_addr" = getelementptr inbounds i64, ptr %"new::Array", i64 -1
store atomic i64 131739509020240, ptr %"new::Array.tag_addr" unordered, align 8
%4 = getelementptr inbounds ptr, ptr %"new::Array", i64 1
store ptr %.pn22, ptr %"new::Array", align 8
store ptr %.pn20, ptr %4, align 8
%"new::Array.size_ptr" = getelementptr inbounds i8, ptr %"new::Array", i64 16
call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %"new::Array.size_ptr", ptr noundef nonnull align 8 dereferenceable(16) @"_j_const#2", i64 16, i1 false)
%frame.prev32 = load ptr, ptr %frame.prev, align 8
store ptr %frame.prev32, ptr %tls_pgcstack, align 8
ret ptr %"new::Array"
}
julia> code_llvm((); debuginfo=:none) do; FixedSizeMatrix{Float64}(undef, 2, 2); end ; Function Signature: var"#12"()
define void @"julia_#12_2862"(ptr noalias nocapture noundef nonnull sret({ ptr, [2 x i64] }) align 8 dereferenceable(24) %sret_return, ptr noalias nocapture noundef nonnull align 8 dereferenceable(8) %return_roots) #0 {
top:
%"Memory{Float64}[]" = call ptr @jl_alloc_genericmemory(ptr nonnull @"+Core.GenericMemory#2865.jit", i64 4)
store ptr %"Memory{Float64}[]", ptr %return_roots, align 8
store ptr %"Memory{Float64}[]", ptr %sret_return, align 8
%sret_return.repack1 = getelementptr inbounds { ptr, [2 x i64] }, ptr %sret_return, i64 0, i32 1
store i64 2, ptr %sret_return.repack1, align 8
%sret_return.repack1.repack3 = getelementptr inbounds { ptr, [2 x i64] }, ptr %sret_return, i64 0, i32 1, i64 1
store i64 2, ptr %sret_return.repack1.repack3, align 8
ret void
} |
I forgot to mention one thing that possibly makes this PR incorrect (not sure). Because of JuliaLang/julia#28611. I don't know if that old issue is still relevant, so I raised this topic on Slack, in the #internals channel. |
I guess we don't care about |
Well I didn't know if the |
Based on JuliaLang/julia#54250 (comment) the problem is the |
This works correctly as is. The problem with this for |
It could be spotted also in the LLVM code above, but it may be worth pointing out that
without
|
This is analogous with the behavior for
Array
and should make type stability easier for some uses.