From 5e5a374379eeb64f1942ea37fc213434c1ac3f36 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Fri, 19 Jan 2024 22:05:34 -0600 Subject: [PATCH 01/34] amos: add SciPy license --- src/amos/LICENSE.md | 99 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 src/amos/LICENSE.md diff --git a/src/amos/LICENSE.md b/src/amos/LICENSE.md new file mode 100644 index 00000000..2952d1b8 --- /dev/null +++ b/src/amos/LICENSE.md @@ -0,0 +1,99 @@ +The code implementation in this folder is translated from the C code in [`scipy/special/_amos.c`](https://github.com/scipy/scipy/blob/main/scipy/special/_amos.c) to julia. + +The license for the SciPy project is [BSD 3-Clause "New" or "Revised" License](https://github.com/scipy/scipy/blob/main/LICENSE.txt). + +See below for the license. +``` +/* + * This file accompanied with the implementation file _amos.c is a + * C translation of the Fortran code written by D.E. Amos with the + * following original description: + * + * + * A Portable Package for Bessel Functions of a Complex Argument + * and Nonnegative Order + * + * This algorithm is a package of subroutines for computing Bessel + * functions and Airy functions. The routines are updated + * versions of those routines found in TOMS algorithm 644. + * + * Disclaimer: + * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * ISSUED BY SANDIA LABORATORIES, + * A PRIME CONTRACTOR TO THE + * UNITED STATES DEPARTMENT OF ENERGY + * * * * * * * * * * * * * * NOTICE * * * * * * * * * * * * * * * + * THIS REPORT WAS PREPARED AS AN ACCOUNT OF WORK SPONSORED BY THE + * UNITED STATES GOVERNMENT. NEITHER THE UNITED STATES NOR THE + * UNITED STATES DEPARTMENT OF ENERGY, NOR ANY OF THEIR + * EMPLOYEES, NOR ANY OF THEIR CONTRACTORS, SUBCONTRACTORS, OR THEIR + * EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY + * LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS + * OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT OR PROCESS + * DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE + * PRIVATELY OWNED RIGHTS. + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * THIS CODE HAS BEEN APPROVED FOR UNLIMITED RELEASE. + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * + * + * The original Fortran code can be found at https://www.netlib.org/amos/ + * + * References: + * + * [1]: Abramowitz, M. and Stegun, I. A., Handbook of Mathematical + * Functions, NBS Applied Math Series 55, U.S. Dept. of Commerce, + * Washington, D.C., 1955 + * + * [2]: Amos, D. E., Algorithm 644, A Portable Package For Bessel + * Functions of a Complex Argument and Nonnegative Order, ACM + * Transactions on Mathematical Software, Vol. 12, No. 3, + * September 1986, Pages 265-273, DOI:10.1145/7921.214331 + * + * [3]: Amos, D. E., Remark on Algorithm 644, ACM Transactions on + * Mathematical Software, Vol. 16, No. 4, December 1990, Page + * 404, DOI:10.1145/98267.98299 + * + * [4]: Amos, D. E., A remark on Algorithm 644: "A portable package + * for Bessel functions of a complex argument and nonnegative order", + * ACM Transactions on Mathematical Software, Vol. 21, No. 4, + * December 1995, Pages 388-393, DOI:10.1145/212066.212078 + * + * [5]: Cody, W. J., Algorithm 665, MACHAR: A Subroutine to + * Dynamically Determine Machine Parameters, ACM Transactions on + * Mathematical Software, Vol. 14, No. 4, December 1988, Pages + * 303-311, DOI:10.1145/50063.51907 + * + */ + +/* + * Copyright (C) 2024 SciPy developers + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * a. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * b. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * c. Names of the SciPy Developers may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +``` From 780d7ec86d59c823771be91daead9d5a778c723f Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 20 Jan 2024 00:26:49 -0600 Subject: [PATCH 02/34] amos: add const array: I1_MACH, D1_MACH --- src/amos/const.jl | 107 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 src/amos/const.jl diff --git a/src/amos/const.jl b/src/amos/const.jl new file mode 100644 index 00000000..402ff5fa --- /dev/null +++ b/src/amos/const.jl @@ -0,0 +1,107 @@ +# SPDX-License-Identifier: BSD-3-Clause OR MIT + +# TODO: Use NamedTuple? +""" + I1_MACH :: Int32 + +integer machine dependent constants. + +`I1_MACH` can be used to obtain machine-dependent parameters +for the local machine environment. + +# fortran comments +```fortran +C I/O unit numbers. +C I1MACH( 1) = the standard input unit. +C I1MACH( 2) = the standard output unit. +C I1MACH( 3) = the standard punch unit. +C I1MACH( 4) = the standard error message unit. +C +C Words. +C I1MACH( 5) = the number of bits per integer storage unit. +C I1MACH( 6) = the number of characters per integer storage unit. +C Integers. +C I1MACH( 7) = A, the base. +C I1MACH( 8) = S, the number of base-A digits. +C I1MACH( 9) = A**S - 1, the largest magnitude. +C I1MACH(10) = B, the base. +C +C Single-Precision +C I1MACH(11) = T, the number of base-B digits. +C I1MACH(12) = EMIN, the smallest exponent E. +C I1MACH(13) = EMAX, the largest exponent E. +C Double-Precision +C I1MACH(14) = T, the number of base-B digits. +C I1MACH(15) = EMIN, the smallest exponent E. +C I1MACH(16) = EMAX, the largest exponent E. +``` + +# Impl Ref +- [`openspecfun/amos/i1mach.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/i1mach.f) +- [`scipy/scipy/special/_amos.c`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L134-L151) +""" +const I1_MACH = Int32[ +# I/O unit numbers. + 5, # [ 1] standard input + 6, # [ 2] standard output + 7, # [ 3] standard punch + 0, # [ 4] standard error + +# Words. + 32, # [ 5] bits per integer + 4, # [ 6] sizeof(Int32) +# Integers. + 2, # [ 7] base for integers + 31, # [ 8] digits of integer base + 2147483647, # [ 9] typemax(Int32) + +# Floating-Point Numbers. + 2, # [10] FLT_RADIX; + # Single-Precision :: Float32 + 24, # [11] FLT_MANT_DIG; + -126, # [12] FLT_MIN_EXP; + 128, # [13] FLT_MAX_EXP; + # Double-Precision :: Float64 + 53, # [14] DBL_MANT_DIG; + -1021, # [15] DBL_MIN_EXP; + 1024 # [16] DBL_MAX_EXP; +] # I1_MACH + +""" + D1_MACH :: Float64 + +double precision machine dependent constants. + +`D1_MACH` can be used to obtain machine-dependent parameters +for the local machine environment. + +# fortran comments +```fortran +C D1MACH( 1) = B**(EMIN-1), +C D1MACH( 2) = B**EMAX*(1 - B**(-T)), the largest magnitude. +C D1MACH( 3) = B**(-T), the smallest relative spacing. +C D1MACH( 4) = B**(1-T), the largest relative spacing. +C D1MACH( 5) = LOG10(B) +C +C I1MACH(10) = B, the base. +C I1MACH(14) = T, the number of base-B digits. +C I1MACH(15) = EMIN, the smallest exponent E. +C I1MACH(16) = EMAX, the largest exponent E. +``` + +# Impl Ref +- [`openspecfun/amos/d1mach.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/d1mach.f) +- [`scipy/scipy/special/_amos.c`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L126-L132) +""" +const D1_MACH = Float64[ +# [1] smallest positive magnitude: np.finfo(np.float64).tiny + 2.2250738585072014e-308, +# [2] largest magnitude: np.finfo(np.float64).max + 1.7976931348623157e+308, +# [3] smallest relative spacing: eps(Float64)/2 + 1.1102230246251565e-16, +# [4] largest relative spacing: eps(Float64) + 2.220446049250313e-16, +# [5] log10(2) + 0.3010299956639812, +] # D1_MACH From cfa7c7ec83ebfb7f1b9050770127e6d014d53806 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 20 Jan 2024 10:36:35 -0600 Subject: [PATCH 03/34] amos: add helper func and test --- src/SpecialFunctions.jl | 1 + src/amos/AMOS.jl | 5 +++++ src/amos/helper.jl | 3 +++ test/amos/helper.jl | 4 ++++ test/runtests.jl | 5 +++++ 5 files changed, 18 insertions(+) create mode 100644 src/amos/AMOS.jl create mode 100644 src/amos/helper.jl create mode 100644 test/amos/helper.jl diff --git a/src/SpecialFunctions.jl b/src/SpecialFunctions.jl index 292421d8..21ebf5bd 100644 --- a/src/SpecialFunctions.jl +++ b/src/SpecialFunctions.jl @@ -80,6 +80,7 @@ export cosint, lbinomial +include("amos/AMOS.jl") include("bessel.jl") include("erf.jl") include("ellip.jl") diff --git a/src/amos/AMOS.jl b/src/amos/AMOS.jl new file mode 100644 index 00000000..51fcdc55 --- /dev/null +++ b/src/amos/AMOS.jl @@ -0,0 +1,5 @@ +module AMOS +include("const.jl") +include("helper.jl") + +end # AMOS diff --git a/src/amos/helper.jl b/src/amos/helper.jl new file mode 100644 index 00000000..76ded628 --- /dev/null +++ b/src/amos/helper.jl @@ -0,0 +1,3 @@ +function uchk() + true +end diff --git a/test/amos/helper.jl b/test/amos/helper.jl new file mode 100644 index 00000000..e860e2f3 --- /dev/null +++ b/test/amos/helper.jl @@ -0,0 +1,4 @@ + +@testset "AMOS.uchk" begin + @test AMOS.uchk() +end diff --git a/test/runtests.jl b/test/runtests.jl index 69dcafe7..7872f8f4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,6 +8,8 @@ using Test using Base.MathConstants: γ using SpecialFunctions: AmosException, f64 +using SpecialFunctions.AMOS +const AMOS = SpecialFunctions.AMOS # useful test functions for relative error, which differ from isapprox # relerr separately looks at the real and imaginary parts if one of the arguments is complex @@ -22,6 +24,9 @@ checktol(err::Float64) = err ≤ 1e-13 tests = [ +# AMOS test + "amos/helper", +# "bessel", "beta_inc", "betanc", From 40bcb26d3b17692a5db2a613cc861c9042e9f431 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 20 Jan 2024 12:05:56 -0600 Subject: [PATCH 04/34] amos: impl helper func `uchk` --- src/amos/AMOS.jl | 1 + src/amos/helper.jl | 34 ++++++++++++++++++++++++++++++++-- src/amos/warp.jl | 16 ++++++++++++++++ 3 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 src/amos/warp.jl diff --git a/src/amos/AMOS.jl b/src/amos/AMOS.jl index 51fcdc55..b49f5f88 100644 --- a/src/amos/AMOS.jl +++ b/src/amos/AMOS.jl @@ -1,5 +1,6 @@ module AMOS include("const.jl") include("helper.jl") +include("warp.jl") end # AMOS diff --git a/src/amos/helper.jl b/src/amos/helper.jl index 76ded628..2d86775c 100644 --- a/src/amos/helper.jl +++ b/src/amos/helper.jl @@ -1,3 +1,33 @@ -function uchk() - true +# SPDX-License-Identifier: BSD-3-Clause OR MIT + +""" + uchk(y::ComplexF64, ascle::Float64, tol::Float64) + +Y ENTERS AS A SCALED QUANTITY WHOSE MAGNITUDE IS GREATER THAN +EXP(-ALIM)=ASCLE=1.0E+3*D1MACH(1)/TOL. THE TEST IS MADE TO SEE +IF THE MAGNITUDE OF THE REAL OR IMAGINARY PART WOULD UNDERFLOW +WHEN Y IS SCALED (BY TOL) TO ITS PROPER VALUE. Y IS ACCEPTED +IF THE UNDERFLOW IS AT LEAST ONE PRECISION BELOW THE MAGNITUDE +OF THE LARGEST COMPONENT; OTHERWISE THE PHASE ANGLE DOES NOT HAVE +ABSOLUTE ACCURACY AND AN UNDERFLOW IS ASSUMED. + +# Impl Ref +- [`openspecfun/amos/zuchk.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/zuchk.f) +""" +function uchk(y::ComplexF64, ascle::Float64, tol::Float64) + yr = abs(real(y)) + yi = abs(imag(y)) + ss = max(yr, yi) + st = min(yr, yi) + + if st > ascle + return 0 + else + st /= tol + if ss < st + return 1 + else + return 0 + end + end end diff --git a/src/amos/warp.jl b/src/amos/warp.jl new file mode 100644 index 00000000..2f305d22 --- /dev/null +++ b/src/amos/warp.jl @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: MIT +# This file wraps the exported functions of `libopenspecfun` +# and is not part of AMOS or its C translation. +using OpenSpecFun_jll + +function _uchk(y::ComplexF64, ascle::Float64, tol::Float64) + nz = Ref{Int32}() + + ccall((:zuchk_, libopenspecfun), Cvoid, + (Ref{Float64}, Ref{Float64}, Ref{Int32}, + Ref{Float64}, Ref{Float64}), + real(y), imag(y), nz, + ascle, tol) + + nz[] +end From d14a0e935f3d7ac1523cb46b94fbf38cbeff59a2 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 20 Jan 2024 12:13:40 -0600 Subject: [PATCH 05/34] test: AMOS.uchk --- test/amos/helper.jl | 81 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/test/amos/helper.jl b/test/amos/helper.jl index e860e2f3..e0dfe531 100644 --- a/test/amos/helper.jl +++ b/test/amos/helper.jl @@ -1,4 +1,83 @@ +"Special Float32" +const SPECIAL_FLOAT32 = Float64[ + 0.0, # +0.0 + 0x1p-149, # Min Subnormal Float + 0x1p-127, # mid Subnormal Float + 0x1.fffffcp-127, # Max Subnormal Float + 0x1p-126, # Min Normalized Float + 0x1p+1, # 2 + 0x1.fffffep+127, # Max Normalized Float + Inf, # Inf + reinterpret(Float32, 0x7f800001), # NaN (+min) + NaN, # NaN (+min) + reinterpret(Float32, 0x7fffffff), # NaN (+max) + + # eps( 10^0 ~ 10^16 ) + [ eps(10.0^i) for i in 0:16 ]..., +] # SPECIAL_FLOAT32 +# TOOD: add Float64 const + + +""" +input: + zarr[ (+) ] + +output: + zarr[ (+), (-) ] +""" +function gen_neg_inv(zarr::Vector{Float64}) + vcat( + zarr, + -1.0 * zarr, + inv.(zarr) + ) +end + +""" +input: + zarr[ (+,+) ] + +output: + zarr[ + (-,+), (+,+), + (-,-), (+,-), + ] +""" +function gen_phase4(zarr::Vector{ComplexF64}) + vcat( + 1.0im * zarr, + zarr + -1.0 * zarr, + -1.0im * zarr, + ) +end + + @testset "AMOS.uchk" begin - @test AMOS.uchk() + function tuchk(y::ComplexF64, ascle::Float64, tol::Float64) + @test AMOS.uchk(y,ascle,tol) == AMOS._uchk(y,ascle,tol) + end + + test_tol = [ + SPECIAL_FLOAT32..., + # 1e-0 ~ 1e-16 + [ 10.0^i for i in 0:-1:-16 ]..., + ] + test_ascle = [ + test_tol..., + ] + test_y = [ + complex(SPECIAL_FLOAT32)..., + # [0, 1)+i[0, 1) + rand(ComplexF64, 10)..., + # (1+im) * 1e0 ~ 1e16 + [ (1.0+1im)*10^i for i in 0:16 ]..., + ] + + for tol in gen_neg_inv(test_tol), + ascle in gen_neg_inv(test_ascle), + y in gen_phase4(test_y) + tuchk(y, ascle, tol) + end end From 789dafd433460806cfcc0c3da787daae37a2c06b Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 20 Jan 2024 12:16:33 -0600 Subject: [PATCH 06/34] test: gen more test cases --- test/amos/helper.jl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/test/amos/helper.jl b/test/amos/helper.jl index e0dfe531..caf27b85 100644 --- a/test/amos/helper.jl +++ b/test/amos/helper.jl @@ -24,13 +24,15 @@ input: zarr[ (+) ] output: - zarr[ (+), (-) ] + zarr[ + (+), (-), + inv.(+), inv.(-), + ] """ function gen_neg_inv(zarr::Vector{Float64}) vcat( - zarr, - -1.0 * zarr, - inv.(zarr) + zarr, -1.0*zarr, + inv.(zarr), -1.0*inv.(zarr), ) end From 2c0171ddac1ebda260fde2ec088b125a51cb671f Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 20 Jan 2024 12:21:59 -0600 Subject: [PATCH 07/34] amos: clean doc uchk --- src/amos/helper.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/amos/helper.jl b/src/amos/helper.jl index 2d86775c..90534967 100644 --- a/src/amos/helper.jl +++ b/src/amos/helper.jl @@ -4,12 +4,11 @@ uchk(y::ComplexF64, ascle::Float64, tol::Float64) Y ENTERS AS A SCALED QUANTITY WHOSE MAGNITUDE IS GREATER THAN -EXP(-ALIM)=ASCLE=1.0E+3*D1MACH(1)/TOL. THE TEST IS MADE TO SEE -IF THE MAGNITUDE OF THE REAL OR IMAGINARY PART WOULD UNDERFLOW -WHEN Y IS SCALED (BY TOL) TO ITS PROPER VALUE. Y IS ACCEPTED -IF THE UNDERFLOW IS AT LEAST ONE PRECISION BELOW THE MAGNITUDE -OF THE LARGEST COMPONENT; OTHERWISE THE PHASE ANGLE DOES NOT HAVE -ABSOLUTE ACCURACY AND AN UNDERFLOW IS ASSUMED. +`EXP(-ALIM) = ASCLE = 1.0E+3*D1MACH(1)/TOL`. +THE TEST IS MADE TO SEE IF THE MAGNITUDE OF THE REAL OR IMAGINARY PARTWOULD +UNDERFLOW WHEN Y IS SCALED (BY TOL) TO ITS PROPER VALUE. +Y IS ACCEPTED IF THE UNDERFLOW IS AT LEAST ONE PRECISION BELOW THE MAGNITUDE OF THE LARGEST COMPONENT; +OTHERWISE THE PHASE ANGLE DOES NOT HAVE ABSOLUTE ACCURACY AND AN UNDERFLOW IS ASSUMED. # Impl Ref - [`openspecfun/amos/zuchk.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/zuchk.f) @@ -21,6 +20,7 @@ function uchk(y::ComplexF64, ascle::Float64, tol::Float64) st = min(yr, yi) if st > ascle + # TODO: ret ::Bool return 0 else st /= tol From 0aaa8f8bb4f0f156a7f05e0fffe93264f7699c73 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 20 Jan 2024 12:24:32 -0600 Subject: [PATCH 08/34] amos: uchk clean doc, donot use full uppercase --- src/amos/helper.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/amos/helper.jl b/src/amos/helper.jl index 90534967..90a607bb 100644 --- a/src/amos/helper.jl +++ b/src/amos/helper.jl @@ -3,12 +3,12 @@ """ uchk(y::ComplexF64, ascle::Float64, tol::Float64) -Y ENTERS AS A SCALED QUANTITY WHOSE MAGNITUDE IS GREATER THAN -`EXP(-ALIM) = ASCLE = 1.0E+3*D1MACH(1)/TOL`. -THE TEST IS MADE TO SEE IF THE MAGNITUDE OF THE REAL OR IMAGINARY PARTWOULD -UNDERFLOW WHEN Y IS SCALED (BY TOL) TO ITS PROPER VALUE. -Y IS ACCEPTED IF THE UNDERFLOW IS AT LEAST ONE PRECISION BELOW THE MAGNITUDE OF THE LARGEST COMPONENT; -OTHERWISE THE PHASE ANGLE DOES NOT HAVE ABSOLUTE ACCURACY AND AN UNDERFLOW IS ASSUMED. +`y` enters as a scaled quantity whose magnitude is greater than +`Exp(-alim) = ascle = 1.0e+3*d1mach(1)/tol`. +The test is made to see if the magnitude of the real or imaginary part would +Underflow when `y` is scaled (by `tol`) to its proper value. +`y` is accepted if the underflow is at least one precision below the magnitude of the largest component; +Otherwise the phase angle does not have absolute accuracy and an underflow is assumed. # Impl Ref - [`openspecfun/amos/zuchk.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/zuchk.f) From ca9babb39bfc8bbdfd701c4d1fa6afd1b897a59f Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 20 Jan 2024 13:33:17 -0600 Subject: [PATCH 09/34] test: amos const --- test/amos/const.jl | 11 +++++++++++ test/runtests.jl | 3 ++- 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 test/amos/const.jl diff --git a/test/amos/const.jl b/test/amos/const.jl new file mode 100644 index 00000000..b5077510 --- /dev/null +++ b/test/amos/const.jl @@ -0,0 +1,11 @@ + +@testset "AMOS.I1_MACH,D1_MACH" begin + # I1_MACH + @test AMOS.I1_MACH[6] == sizeof(Int32) + @test AMOS.I1_MACH[9] == typemax(Int32) + + # D1_MACH + @test AMOS.D1_MACH[3] == eps(Float64)/2 + @test AMOS.D1_MACH[4] == eps(Float64) + @test AMOS.D1_MACH[5] == log10(2) +end diff --git a/test/runtests.jl b/test/runtests.jl index 7872f8f4..11691c77 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -25,7 +25,8 @@ checktol(err::Float64) = err ≤ 1e-13 tests = [ # AMOS test - "amos/helper", + "amos/const", + "amos/helper", # "bessel", "beta_inc", From 4cfed755baa8b66c2b27624936dbf4189544c87a Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 20 Jan 2024 13:37:42 -0600 Subject: [PATCH 10/34] test: AMOS.GAMMALN_GLN --- test/amos/const.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/amos/const.jl b/test/amos/const.jl index b5077510..10831e0e 100644 --- a/test/amos/const.jl +++ b/test/amos/const.jl @@ -9,3 +9,9 @@ @test AMOS.D1_MACH[4] == eps(Float64) @test AMOS.D1_MACH[5] == log10(2) end + +@testset "AMOS.GAMMALN_GLN" begin + for idx in 1:100 + @test AMOS.GAMMALN_GLN[idx] == log(gamma(idx)) + end +end From 43f6353f82644c57747a23af15062420a5fa6753 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 20 Jan 2024 13:41:41 -0600 Subject: [PATCH 11/34] test: amos const size --- test/amos/const.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/amos/const.jl b/test/amos/const.jl index 10831e0e..bf9c1eef 100644 --- a/test/amos/const.jl +++ b/test/amos/const.jl @@ -1,17 +1,23 @@ @testset "AMOS.I1_MACH,D1_MACH" begin # I1_MACH + @test size(AMOS.I1_MACH) == (16,) @test AMOS.I1_MACH[6] == sizeof(Int32) @test AMOS.I1_MACH[9] == typemax(Int32) # D1_MACH + @test size(AMOS.D1_MACH) == (5,) @test AMOS.D1_MACH[3] == eps(Float64)/2 @test AMOS.D1_MACH[4] == eps(Float64) @test AMOS.D1_MACH[5] == log10(2) end @testset "AMOS.GAMMALN_GLN" begin + @test size(AMOS.GAMMALN_GLN) == (100,) for idx in 1:100 @test AMOS.GAMMALN_GLN[idx] == log(gamma(idx)) end + + @test size(AMOS.GAMMALN_CF) == (22,) + # TODO: test AMOS.GAMMALN_CF end From 8d26f183384200c6f57ffa298eb2fbbef6fd142a Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 20 Jan 2024 13:41:53 -0600 Subject: [PATCH 12/34] amos: add GAMMALN_GLN , GAMMALN_CF --- src/amos/const.jl | 56 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/src/amos/const.jl b/src/amos/const.jl index 402ff5fa..1b89eb2d 100644 --- a/src/amos/const.jl +++ b/src/amos/const.jl @@ -105,3 +105,59 @@ const D1_MACH = Float64[ # [5] log10(2) 0.3010299956639812, ] # D1_MACH + + +""" + GAMMALN_GLN :: Vector{Float64} + +LNGAMMA(N), N=1,100 + +# Impl Ref +- [`openspecfun/amos/dgamln.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/dgamln.f) +- [`scipy/scipy/special/_amos.c:dgamln_cf[22]`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L345-L371) +""" +const GAMMALN_GLN = Float64[ + 0.00000000000000000e+00, 0.00000000000000000e+00, 6.93147180559945309e-01, 1.79175946922805500e+00, # 0 + 3.17805383034794562e+00, 4.78749174278204599e+00, 6.57925121201010100e+00, 8.52516136106541430e+00, # 4 + 1.06046029027452502e+01, 1.28018274800814696e+01, 1.51044125730755153e+01, 1.75023078458738858e+01, # 8 + 1.99872144956618861e+01, 2.25521638531234229e+01, 2.51912211827386815e+01, 2.78992713838408916e+01, # 12 + 3.06718601060806728e+01, 3.35050734501368889e+01, 3.63954452080330536e+01, 3.93398841871994940e+01, # 16 + 4.23356164607534850e+01, 4.53801388984769080e+01, 4.84711813518352239e+01, 5.16066755677643736e+01, # 20 + 5.47847293981123192e+01, 5.80036052229805199e+01, 6.12617017610020020e+01, 6.45575386270063311e+01, # 24 + 6.78897431371815350e+01, 7.12570389671680090e+01, 7.46582363488301644e+01, 7.80922235533153106e+01, # 28 + 8.15579594561150372e+01, 8.50544670175815174e+01, 8.85808275421976788e+01, 9.21361756036870925e+01, # 32 + 9.57196945421432025e+01, 9.93306124547874269e+01, 1.02968198614513813e+02, 1.06631760260643459e+02, # 36 + 1.10320639714757395e+02, 1.14034211781461703e+02, 1.17771881399745072e+02, 1.21533081515438634e+02, # 40 + 1.25317271149356895e+02, 1.29123933639127215e+02, 1.32952575035616310e+02, 1.36802722637326368e+02, # 44 + 1.40673923648234259e+02, 1.44565743946344886e+02, 1.48477766951773032e+02, 1.52409592584497358e+02, # 48 + 1.56360836303078785e+02, 1.60331128216630907e+02, 1.64320112263195181e+02, 1.68327445448427652e+02, # 52 + 1.72352797139162802e+02, 1.76395848406997352e+02, 1.80456291417543771e+02, 1.84533828861449491e+02, # 56 + 1.88628173423671591e+02, 1.92739047287844902e+02, 1.96866181672889994e+02, 2.01009316399281527e+02, # 60 + 2.05168199482641199e+02, 2.09342586752536836e+02, 2.13532241494563261e+02, 2.17736934113954227e+02, # 64 + 2.21956441819130334e+02, 2.26190548323727593e+02, 2.30439043565776952e+02, 2.34701723442818268e+02, # 68 + 2.38978389561834323e+02, 2.43268849002982714e+02, 2.47572914096186884e+02, 2.51890402209723194e+02, # 72 + 2.56221135550009525e+02, 2.60564940971863209e+02, 2.64921649798552801e+02, 2.69291097651019823e+02, # 76 + 2.73673124285693704e+02, 2.78067573440366143e+02, 2.82474292687630396e+02, 2.86893133295426994e+02, # 80 + 2.91323950094270308e+02, 2.95766601350760624e+02, 3.00220948647014132e+02, 3.04686856765668715e+02, # 84 + 3.09164193580146922e+02, 3.13652829949879062e+02, 3.18152639620209327e+02, 3.22663499126726177e+02, # 88 + 3.27185287703775217e+02, 3.31717887196928473e+02, 3.36261181979198477e+02, 3.40815058870799018e+02, # 92 + 3.45379407062266854e+02, 3.49954118040770237e+02, 3.54539085519440809e+02, 3.59134205369575399e+02, # 96 +] # GAMMALN_GLN + +""" + GAMMALN_CF :: Vector{Float64} + +COEFFICIENTS OF ASYMPTOTIC EXPANSION [22] + +# Impl Ref +- [`openspecfun/amos/dgamln.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/dgamln.f) +- [`scipy/scipy/special/_amos.c:dgamln_cf[22]`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L373-L380) +""" +const GAMMALN_CF = Float64[ + 8.33333333333333333e-02, -2.77777777777777778e-03, 7.93650793650793651e-04, -5.95238095238095238e-04, # 0 + 8.41750841750841751e-04, -1.91752691752691753e-03, 6.41025641025641026e-03, -2.95506535947712418e-02, # 4 + 1.79644372368830573e-01, -1.39243221690590112e+00, 1.34028640441683920e+01, -1.56848284626002017e+02, # 8 + 2.19310333333333333e+03, -3.61087712537249894e+04, 6.91472268851313067e+05, -1.52382215394074162e+07, # 12 + 3.82900751391414141e+08, -1.08822660357843911e+10, 3.47320283765002252e+11, -1.23696021422692745e+13, # 16 + 4.88788064793079335e+14, -2.13203339609193739e+16 +] # GAMMALN_CF From 78ca2aba6b79dabba4d04ba109633ad647486f12 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 20 Jan 2024 15:25:48 -0600 Subject: [PATCH 13/34] amos: impl and test AMOS.gammaln --- src/amos/helper.jl | 91 ++++++++++++++++++++++++++++++++++++++++++++- src/amos/warp.jl | 17 ++++++++- test/amos/helper.jl | 61 ++++++++++++++++++++++++++++-- 3 files changed, 162 insertions(+), 7 deletions(-) diff --git a/src/amos/helper.jl b/src/amos/helper.jl index 90a607bb..05c01669 100644 --- a/src/amos/helper.jl +++ b/src/amos/helper.jl @@ -4,7 +4,7 @@ uchk(y::ComplexF64, ascle::Float64, tol::Float64) `y` enters as a scaled quantity whose magnitude is greater than -`Exp(-alim) = ascle = 1.0e+3*d1mach(1)/tol`. +`Exp(-alim) = ascle = 1.0e+3*D1_MACH(1)/tol`. The test is made to see if the magnitude of the real or imaginary part would Underflow when `y` is scaled (by `tol`) to its proper value. `y` is accepted if the underflow is at least one precision below the magnitude of the largest component; @@ -18,7 +18,7 @@ function uchk(y::ComplexF64, ascle::Float64, tol::Float64) yi = abs(imag(y)) ss = max(yr, yi) st = min(yr, yi) - + if st > ascle # TODO: ret ::Bool return 0 @@ -31,3 +31,90 @@ function uchk(y::ComplexF64, ascle::Float64, tol::Float64) end end end + +""" + gammaln(z::Float64) + +Compute the logarithm of the gamma function. + +# fortran comments +DGAMLN COMPUTES THE NATURAL LOG OF THE GAMMA FUNCTION FOR +Z.GT.0. THE ASYMPTOTIC EXPANSION IS USED TO GENERATE VALUES +GREATER THAN ZMIN WHICH ARE ADJUSTED BY THE RECURSION +G(Z+1)=Z*G(Z) FOR Z.LE.ZMIN. THE FUNCTION WAS MADE AS +PORTABLE AS POSSIBLE BY COMPUTIMG ZMIN FROM THE NUMBER OF BASE +10 DIGITS IN A WORD, RLN=AMAX1(-ALOG10(R1MACH(4)),0.5E-18) +LIMITED TO 18 DIGITS OF (RELATIVE) ACCURACY. + +SINCE INTEGER ARGUMENTS ARE COMMON, A TABLE LOOK UP ON 100 +VALUES IS USED FOR SPEED OF EXECUTION. + +# Impl Ref +- [`openspecfun/amos/dgamln.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/dgamln.f) +""" +function gammaln(z::Float64) + con::Float64 = 1.83787706640934548 # ln(2*pi) + @assert con == log(big"2" * pi) |> Float64 + + # TODO: handle z <= 0 || isnan(z) first + if z > 0.0 + if z <= 101.0 + nz = Int(trunc(z)) + fz::Float64 = z - nz + if fz <= 0.0 + if nz <= 100 + return GAMMALN_GLN[nz] + end + end + end #= 10 =# + + wdtol = max(D1_MACH[3+1], 1e-18) + i1m = I1_MACH[13+1] + rln = D1_MACH[4+1] * i1m + fln = max(min(rln, 20.0), 3.0) - 3.0 + zm = 1.8 + 0.3875 * fln + mz = trunc(zm) + 1 + zmin = mz + zdmy = z + zinc = 0.0 + if z < zmin + zinc = zmin - nz + zdmy = z + zinc + end #= 20 =# + + zp = 1.0 / zdmy + t1 = GAMMALN_CF[0+1] * zp + s = t1 + if zp >= wdtol + zsq = zp * zp + tst = t1 * wdtol + # `i=1` is outside `if` + for i = 2:22 + zp *= zsq + trm = GAMMALN_CF[i] * zp + if abs(trm) < tst + break # Goto 40 + end + + s += trm + end #= 30 =# + end #= 40 =# + + if 0.0 == zinc + tlg = log(z) + return z * (tlg - 1.0) + 0.5 * (con - tlg) + s + end #= 50 =# + + zp = 1.0 + nz = trunc(zinc) + for i = 0:(nz-1) + zp *= z + i + end #= 60 =# + + tlg = log(zdmy) + return zdmy * (tlg - 1.0) - log(zp) + 0.5 * (con - tlg) + s + end #= 70 =# + + # Zero or negative argument + return NaN +end diff --git a/src/amos/warp.jl b/src/amos/warp.jl index 2f305d22..e6f90d2f 100644 --- a/src/amos/warp.jl +++ b/src/amos/warp.jl @@ -7,10 +7,25 @@ function _uchk(y::ComplexF64, ascle::Float64, tol::Float64) nz = Ref{Int32}() ccall((:zuchk_, libopenspecfun), Cvoid, - (Ref{Float64}, Ref{Float64}, Ref{Int32}, + (Ref{Float64}, Ref{Float64}, Ref{Int32}, Ref{Float64}, Ref{Float64}), real(y), imag(y), nz, ascle, tol) nz[] end + +function _gammaln(z::Float64) + isnan(z) && return NaN + + ierr = Ref{Int32}() + gamln = ccall((:dgamln_, libopenspecfun), Cdouble, + (Ref{Float64}, Ref{Int32}), + z, ierr) + + if 0==ierr[] + gamln + else + NaN + end +end diff --git a/test/amos/helper.jl b/test/amos/helper.jl index caf27b85..ab22d57f 100644 --- a/test/amos/helper.jl +++ b/test/amos/helper.jl @@ -20,12 +20,12 @@ const SPECIAL_FLOAT32 = Float64[ """ -input: +input: zarr[ (+) ] output: - zarr[ - (+), (-), + zarr[ + (+), (-), inv.(+), inv.(-), ] """ @@ -37,7 +37,7 @@ function gen_neg_inv(zarr::Vector{Float64}) end """ -input: +input: zarr[ (+,+) ] output: @@ -83,3 +83,56 @@ end tuchk(y, ascle, tol) end end + +"Pure julia log(gamma(z))" +function gammalog(z::Float64) + if z > 0.0 + gam = gamma(z) + if gam > 0.0 + return log(gam) + end + end + + return NaN +end + +@testset "AMOS.gammaln" begin + test_y = [ + SPECIAL_FLOAT32..., + # [0, 1) + rand(Float64, 10)..., + # 1e-0 ~ 1e-16 + [ 10.0^i for i in 0:-1:-16 ]..., + -1.0, -10., -Inf, + NaN, + ] + + for y in (test_y) + ref_jl = gammalog(y) # pure julia impl, ground truth + ref = AMOS._gammaln(y) # call AMOS, baseline + res = AMOS.gammaln(y) # fortran translation + + if isnan(ref) + @test isnan(res) + if isinf(ref_jl) + # broken on 0x1.fffffep+127 + @test_broken isnan(ref_jl) + else + @test isnan(ref_jl) + end + else + @test ref ≈ res + @test ref == res + + if isinf(ref_jl) + # broken on 0x1.fffffcp-127 + @test_broken ref_jl ≈ res + else + @test ref_jl ≈ res + end + end + end + + @test_broken AMOS.gammaln(0x1.fffffep+127) == Inf + @test_broken AMOS.gammaln(0x1.fffffcp-127) == gammalog(0x1.fffffcp-127) +end From 535d80681daf5e9209e53afe35bb7b544f42a55d Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 20 Jan 2024 15:26:03 -0600 Subject: [PATCH 14/34] amos: rm trailing space --- src/amos/LICENSE.md | 12 ++++++------ src/amos/const.jl | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/amos/LICENSE.md b/src/amos/LICENSE.md index 2952d1b8..1a687aa1 100644 --- a/src/amos/LICENSE.md +++ b/src/amos/LICENSE.md @@ -12,7 +12,7 @@ See below for the license. * * A Portable Package for Bessel Functions of a Complex Argument * and Nonnegative Order - * + * * This algorithm is a package of subroutines for computing Bessel * functions and Airy functions. The routines are updated * versions of those routines found in TOMS algorithm 644. @@ -43,25 +43,25 @@ See below for the license. * The original Fortran code can be found at https://www.netlib.org/amos/ * * References: - * + * * [1]: Abramowitz, M. and Stegun, I. A., Handbook of Mathematical * Functions, NBS Applied Math Series 55, U.S. Dept. of Commerce, * Washington, D.C., 1955 - * + * * [2]: Amos, D. E., Algorithm 644, A Portable Package For Bessel * Functions of a Complex Argument and Nonnegative Order, ACM * Transactions on Mathematical Software, Vol. 12, No. 3, * September 1986, Pages 265-273, DOI:10.1145/7921.214331 - * + * * [3]: Amos, D. E., Remark on Algorithm 644, ACM Transactions on * Mathematical Software, Vol. 16, No. 4, December 1990, Page * 404, DOI:10.1145/98267.98299 - * + * * [4]: Amos, D. E., A remark on Algorithm 644: "A portable package * for Bessel functions of a complex argument and nonnegative order", * ACM Transactions on Mathematical Software, Vol. 21, No. 4, * December 1995, Pages 388-393, DOI:10.1145/212066.212078 - * + * * [5]: Cody, W. J., Algorithm 665, MACHAR: A Subroutine to * Dynamically Determine Machine Parameters, ACM Transactions on * Mathematical Software, Vol. 14, No. 4, December 1988, Pages diff --git a/src/amos/const.jl b/src/amos/const.jl index 1b89eb2d..64082928 100644 --- a/src/amos/const.jl +++ b/src/amos/const.jl @@ -77,7 +77,7 @@ for the local machine environment. # fortran comments ```fortran -C D1MACH( 1) = B**(EMIN-1), +C D1MACH( 1) = B**(EMIN-1), C D1MACH( 2) = B**EMAX*(1 - B**(-T)), the largest magnitude. C D1MACH( 3) = B**(-T), the smallest relative spacing. C D1MACH( 4) = B**(1-T), the largest relative spacing. @@ -154,10 +154,10 @@ COEFFICIENTS OF ASYMPTOTIC EXPANSION [22] - [`scipy/scipy/special/_amos.c:dgamln_cf[22]`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L373-L380) """ const GAMMALN_CF = Float64[ - 8.33333333333333333e-02, -2.77777777777777778e-03, 7.93650793650793651e-04, -5.95238095238095238e-04, # 0 + 8.33333333333333333e-02, -2.77777777777777778e-03, 7.93650793650793651e-04, -5.95238095238095238e-04, # 0 8.41750841750841751e-04, -1.91752691752691753e-03, 6.41025641025641026e-03, -2.95506535947712418e-02, # 4 1.79644372368830573e-01, -1.39243221690590112e+00, 1.34028640441683920e+01, -1.56848284626002017e+02, # 8 2.19310333333333333e+03, -3.61087712537249894e+04, 6.91472268851313067e+05, -1.52382215394074162e+07, # 12 3.82900751391414141e+08, -1.08822660357843911e+10, 3.47320283765002252e+11, -1.23696021422692745e+13, # 16 - 4.88788064793079335e+14, -2.13203339609193739e+16 + 4.88788064793079335e+14, -2.13203339609193739e+16 ] # GAMMALN_CF From 48fd26e8f4a7cdc23d2d3414fd5abb868cbb5b41 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 20 Jan 2024 15:32:35 -0600 Subject: [PATCH 15/34] amos: gammaln add note --- src/amos/helper.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/amos/helper.jl b/src/amos/helper.jl index 05c01669..bb0ad58b 100644 --- a/src/amos/helper.jl +++ b/src/amos/helper.jl @@ -88,7 +88,7 @@ function gammaln(z::Float64) if zp >= wdtol zsq = zp * zp tst = t1 * wdtol - # `i=1` is outside `if` + # NOTE: `i=1` is outside `if` for i = 2:22 zp *= zsq trm = GAMMALN_CF[i] * zp @@ -107,8 +107,9 @@ function gammaln(z::Float64) zp = 1.0 nz = trunc(zinc) - for i = 0:(nz-1) - zp *= z + i + for i = 1:nz + # NOTE: Parentheses are required to avoid overflow + zp *= z + (i - 1) end #= 60 =# tlg = log(zdmy) From 9a7e747bd724234f9bd339b5682be7f12529039c Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 20 Jan 2024 15:36:47 -0600 Subject: [PATCH 16/34] amos\helper: add impl ref link --- src/amos/helper.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/amos/helper.jl b/src/amos/helper.jl index bb0ad58b..ffaa53a3 100644 --- a/src/amos/helper.jl +++ b/src/amos/helper.jl @@ -12,6 +12,7 @@ Otherwise the phase angle does not have absolute accuracy and an underflow is as # Impl Ref - [`openspecfun/amos/zuchk.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/zuchk.f) +- [`scipy/scipy/special/_amos.c:amos_uchk`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L4405-L4439) """ function uchk(y::ComplexF64, ascle::Float64, tol::Float64) yr = abs(real(y)) @@ -51,6 +52,7 @@ VALUES IS USED FOR SPEED OF EXECUTION. # Impl Ref - [`openspecfun/amos/dgamln.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/dgamln.f) +- [`scipy/scipy/special/_amos.c:amos_gamln`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L3678C8-L3775) """ function gammaln(z::Float64) con::Float64 = 1.83787706640934548 # ln(2*pi) From 87d744bd705e91ec53a68b5d3b36efa6668d9780 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 20 Jan 2024 23:58:44 -0600 Subject: [PATCH 17/34] amos: impl `AMOS.kscl!` --- src/amos/helper.jl | 148 ++++++++++++++++++++++++++++++++++++++++++++ src/amos/warp.jl | 51 +++++++++++++++ test/amos/helper.jl | 92 +++++++++++++++++++++++++++ 3 files changed, 291 insertions(+) diff --git a/src/amos/helper.jl b/src/amos/helper.jl index ffaa53a3..e1937501 100644 --- a/src/amos/helper.jl +++ b/src/amos/helper.jl @@ -121,3 +121,151 @@ function gammaln(z::Float64) # Zero or negative argument return NaN end + + +""" + kscl!( + y::Vector{ComplexF64}, + zr::ComplexF64, + fnu::Float64, + n::Float64, + rz::ComplexF64, + ascle::Float64, + tol::Float64, + elim::Float64, + ) + +# fortran comments +SET K FUNCTIONS TO ZERO ON UNDERFLOW, CONTINUE RECURRENCE +ON SCALED FUNCTIONS UNTIL TWO MEMBERS COME ON SCALE, THEN +RETURN WITH MIN(NZ+2,N) VALUES SCALED BY 1/TOL. + +# Impl Ref +- [`openspecfun/amos/zkscl.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/zkscl.f) +- [`scipy/scipy/special/_amos.c:amos_kscl`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L3949-L4060) +""" +function kscl!( + y::Vector{ComplexF64}, + zr::ComplexF64, + fnu::Float64, + n::Int, + rz::ComplexF64, + ascle::Float64, + tol::Float64, + elim::Float64, +) + cy = ComplexF64[0.0im, 0.0im] + nz::Int = 0 + ic::Int = 0 + nn::Int = ( n > 2 ? 2 : n ) + @assert length(y) >= nn + + kk::Int = 0 + elm = exp(-elim) + zdr = real(zr) + + for i = 1:nn + s1 = y[i] + cy[i] = s1 + as = abs(s1) + acs = -real(zr) + log(as) + nz += 1 + y[i] = 0.0im + if acs < -elim + continue + end + cs = log(s1) + cs -= zr + str = exp(real(cs)) / tol + cs = str * complex(cos(imag(cs)), sin(imag(cs))) + if 0==uchk(cs, ascle, tol) + y[i] = cs + nz -= 1 + ic = i + end + end #= 10 =# + + if n == 1 + # @info "ret n==1: " nz + return nz + end + + if ic <= 1 + @assert n >= 1 + # TODO: may throw BoundsError here + y[1] = 0.0im + nz = 2 + end #= 20 =# + if n == 2 + # @info "ret n==2: " nz + return nz + end + if nz == 0 + # @info "ret nz==0: " n + return nz + end + + fn = fnu + 1.0 + ck = fn * rz + s1 = cy[1] + s2 = cy[2] + zri = imag(zr) + zd = zr + +# FIND TWO CONSECUTIVE Y VALUES ON SCALE. SCALE RECURRENCE IF +# S2 GETS LARGER THAN EXP(ELIM/2) + for i = 3:n + kk = i + cs = s2 + s2 *= ck + s2 += s1 + s1 = cs + ck += rz + as = abs(s2) + alas = log(as) + acs = alas - zdr + nz += 1 + y[i] = 0.0im + if acs >= -elim + cs = log(s2) + cs -= zd + str = exp(real(cs)) / tol + cs = str * complex(cos(imag(cs)), sin(imag(cs))) + if 0==uchk(cs, ascle, tol) + y[i] = cs + nz -= 1 + if ic == (kk - 1) + #= 40 =# + nz = kk - 2 + #= 45 =# + for i in 1:nz + y[i] = 0.0im + end + # @info "ret i==(kk-1): " n nz + return nz + end + + ic = kk + continue + end + end #= 25 =# + + if alas >= (0.5 * elim) + zdr -= elim + zd = complex(zdr, zri) + s1 *= elm + s2 *= elm + end + end #= 30 =# + + nz = n + if ic == n + nz = n - 1 + end #= 45 =# + + for i in 1:nz + y[i] = 0.0im + end + # @info "ret end kscl!: " n nz + return nz +end diff --git a/src/amos/warp.jl b/src/amos/warp.jl index e6f90d2f..0f1c8fa6 100644 --- a/src/amos/warp.jl +++ b/src/amos/warp.jl @@ -29,3 +29,54 @@ function _gammaln(z::Float64) NaN end end + +function update_arr!(y::Vector{ComplexF64}, y_new::Vector{ComplexF64}) + @assert size(y) == size(y_new) + for idx in 1:length(y) + y[idx] = y_new[idx] + end +end + +function _kscl!( + y::Vector{ComplexF64}, + zr::ComplexF64, + fnu::Float64, + n::Int, + rz::ComplexF64, + ascle::Float64, + tol::Float64, + elim::Float64, +) + y_len = length(y) + nz = Ref{Int32}() + yr = real.(y) + yi = imag.(y) + # @info "[_kscl!] before call" yr, yi + # @show yr + # @show yi + + ccall((:zkscl_, libopenspecfun), Cvoid, + # ZRR,ZRI,FNU,N, + (Ref{Float64}, Ref{Float64}, Ref{Float64}, Ref{Int32}, + # YR,YI,NZ, + Ptr{Float64}, Ptr{Float64}, Ref{Int32}, + # RZR,RZI, + Ref{Float64}, Ref{Float64}, + # ASCLE,TOL,ELIM + Ref{Float64}, Ref{Float64}, Ref{Float64}, + ), + real(zr), imag(zr), fnu, Int32(n), + yr, yi, nz, + real(rz), imag(rz), + ascle, tol, elim) + # @info "[_kscl!] after call" yr, yi + # @show yr + # @show yi + + # Update `y` + y_new = complex.(yr, yi) + update_arr!(y, y_new) + @assert length(y) == y_len + + nz[] +end diff --git a/test/amos/helper.jl b/test/amos/helper.jl index ab22d57f..48c93335 100644 --- a/test/amos/helper.jl +++ b/test/amos/helper.jl @@ -54,8 +54,22 @@ function gen_phase4(zarr::Vector{ComplexF64}) -1.0im * zarr, ) end +function gen_phase4(zvv::Vector{Vector{ComplexF64}}) + gen_phase4.(zvv) +end + +"""test if ComplexF64 contains NaN or Inf in real or imag parts. +""" +function contains_inf_nan(z::Vector{ComplexF64}) + z |> real .|> isinf |> any || + z |> imag .|> isinf |> any || + z |> real .|> isnan |> any || + z |> imag .|> isnan |> any +end +# ==== Test Set ====# + @testset "AMOS.uchk" begin function tuchk(y::ComplexF64, ascle::Float64, tol::Float64) @test AMOS.uchk(y,ascle,tol) == AMOS._uchk(y,ascle,tol) @@ -136,3 +150,81 @@ end @test_broken AMOS.gammaln(0x1.fffffep+127) == Inf @test_broken AMOS.gammaln(0x1.fffffcp-127) == gammalog(0x1.fffffcp-127) end + +@testset "AMOS.kscl!" begin + function tkscl!( + y::Vector{ComplexF64}, + zr::ComplexF64, + fnu::Float64=2.0, + n::Int=2, + rz::ComplexF64=0.5+0.5*im, + ascle::Float64=1e-30, + tol::Float64=1e-15, + elim::Float64=20., + ) + # @info "input" y zr fnu n rz ascle tol elim + y_ref = copy(y) + y_res = copy(y) + ref = AMOS._kscl!(y_ref,zr,fnu,n,rz,ascle,tol,elim) + res = AMOS.kscl!(y_res,zr,fnu,n,rz,ascle,tol,elim) + + @test ref == res + if ref != res + @info "ref != res" ref res + @info "params" zr fnu n rz ascle elim + @show y y_ref y_res + end + + if contains_inf_nan(y_ref) + # use === to test NaN/Inf + @test all(y_ref .=== y_res) + else + @test isapprox(y_ref, y_res) + # @info "isapprox" ref zr y y_ref y_res + # @show y_ref + # @show y_res + if !isapprox(y_ref, y_res) + @info "!isapprox" ref zr y y_ref y_res + @info "params" zr fnu n rz ascle elim + @show y y_ref y_res + end + end + end + + test_y = [ + # ComplexF64[], # TODO: n==0 + # n=1 + ComplexF64[0.0], + ComplexF64[1.0], + ComplexF64[pi], + [ rand(ComplexF64, 1) for _ in 1:10 ]..., + # n=2 + ComplexF64[0.0, 0.0], + ComplexF64[1., 1.], + ComplexF64[1., 0.], + ComplexF64[0., 1.], + ComplexF64[1., 2.], + [ rand(ComplexF64, 2) for _ in 1:10 ]..., + # n=5 + ComplexF64[ complex(i,i) for i in 1:5 ], + ] + test_zr = [ + SPECIAL_FLOAT32..., + # [0, 1) + rand(Float64, 10)..., + # 1e-0 ~ 1e-16 + [ 10.0^i for i in 0:-1:-16 ]..., + 1.0, + pi, ℯ, + ] + + for y in gen_phase4(test_y), + zr in gen_neg_inv(test_zr) + # TODO: @test_broken + isnan(zr) && continue + isinf(zr) && continue + 0x1.fffffep+127==zr && continue + + tkscl!(y, complex(zr), 2.0, length(y)) + end +end From 97252f91aa864d05de2ee700e3570f86e72203ae Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 20 Jan 2024 23:59:54 -0600 Subject: [PATCH 18/34] amos: kscl! update comments --- src/amos/helper.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/amos/helper.jl b/src/amos/helper.jl index e1937501..024d9792 100644 --- a/src/amos/helper.jl +++ b/src/amos/helper.jl @@ -136,9 +136,9 @@ end ) # fortran comments -SET K FUNCTIONS TO ZERO ON UNDERFLOW, CONTINUE RECURRENCE -ON SCALED FUNCTIONS UNTIL TWO MEMBERS COME ON SCALE, THEN -RETURN WITH MIN(NZ+2,N) VALUES SCALED BY 1/TOL. +Set k functions to zero on underflow, +continue recurrence on scaled functions until two members come on scale, +then return with `min(nz+2,n)` values scaled by `1/tol`. # Impl Ref - [`openspecfun/amos/zkscl.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/zkscl.f) From 48042645973e6775a0448599641a6d47ee52424a Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sun, 21 Jan 2024 15:20:01 -0600 Subject: [PATCH 19/34] amos: impl `AMOS.s1s2!` --- src/amos/helper.jl | 68 +++++++++++++++++++++++++++++++++++++++++++++ src/amos/warp.jl | 37 ++++++++++++++++++++++++ test/amos/helper.jl | 63 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 168 insertions(+) diff --git a/src/amos/helper.jl b/src/amos/helper.jl index 024d9792..b704a5d9 100644 --- a/src/amos/helper.jl +++ b/src/amos/helper.jl @@ -269,3 +269,71 @@ function kscl!( # @info "ret end kscl!: " n nz return nz end + +""" + s1s2!( + zr::ComplexF64, + s1_ref::Ref{ComplexF64}, + s2_ref::Ref{ComplexF64}, + ascle::Float64, + alim::Float64, + iuf::Int + ) + +# fortran comments +Zs1s2 tests for a possible underflow resulting from the +addition of the I and k functions in the analytic con- +tinuation formula where s1=k function and s2=i function. +On kode=1 the I and k functions are different orders of +magnitude, but for kode=2 they can be of the same order +of magnitude and the maximum must be at least one +precision above the underflow limit. + +# Impl Ref +- [`openspecfun/amos/zs1s2.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/zs1s2.f) +- [`scipy/scipy/special/_amos.c:amos_s1s2`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L4348-L4402) +""" +function s1s2!( + zr::ComplexF64, + s1_ref::Ref{ComplexF64}, + s2_ref::Ref{ComplexF64}, + ascle::Float64, + alim::Float64, + iuf::Int +) + s1 = s1_ref[] + s2 = s2_ref[] + + nz::Int = 0 + as1 = abs(s1) + as2 = abs(s2) + + if (real(s1) != 0.0 || imag(s1) != 0.0) + if as1 != 0.0 + xx = real(zr) + aln = -xx - xx + log(as1) + s1d = s1 + s1_ref[] = 0.0 + as1 = 0.0 + if aln >= -alim + c1 = log(s1d) + c1 -= zr + c1 -= zr + s1_ref[] = exp(c1) + as1 = abs(s1_ref[]) + iuf += 1 + end + end + end #= 10 =# + + aa = max(as1, as2) + if aa > ascle + return nz, iuf + end + + s1_ref[] = 0.0 + s2_ref[] = 0.0 + nz = 1 + iuf = 0 + return nz, iuf +end diff --git a/src/amos/warp.jl b/src/amos/warp.jl index 0f1c8fa6..f7681e69 100644 --- a/src/amos/warp.jl +++ b/src/amos/warp.jl @@ -80,3 +80,40 @@ function _kscl!( nz[] end + +""" +`_s1s2!` will modify `s1_ref` and `s2_ref`. + +return (nz, iuf) +""" +function _s1s2!( + zr::ComplexF64, + s1_ref::Ref{ComplexF64}, + s2_ref::Ref{ComplexF64}, + ascle::Float64, + alim::Float64, + iuf::Int +) + s1r = Ref{Float64}(real(s1_ref[])) + s1i = Ref{Float64}(imag(s1_ref[])) + s2r = Ref{Float64}(real(s2_ref[])) + s2i = Ref{Float64}(imag(s2_ref[])) + nz = Ref{Int32}() + iuf_ref = Ref{Int32}(iuf) + + ccall((:zs1s2_, libopenspecfun), Cvoid, + (Ref{Float64}, Ref{Float64}, # (ZRR,ZRI) + Ref{Float64}, Ref{Float64}, # (S1R,S1I) + Ref{Float64}, Ref{Float64}, # (S2R,S2I) + # NZ, ASCLE, ALIM, IUF + Ref{Int32}, Ref{Float64}, Ref{Float64}, Ref{Int32} + ), + real(zr), imag(zr), + s1r, s1i, + s2r, s2i, + nz, ascle, alim, iuf_ref) + + s1_ref[] = complex(s1r[], s1i[]) + s2_ref[] = complex(s2r[], s2i[]) + nz[], Int(iuf_ref[]) +end diff --git a/test/amos/helper.jl b/test/amos/helper.jl index 48c93335..c110f9d9 100644 --- a/test/amos/helper.jl +++ b/test/amos/helper.jl @@ -228,3 +228,66 @@ end tkscl!(y, complex(zr), 2.0, length(y)) end end + +"test if one of input is NaN" +contains_nan(ref, impl) = isnan(ref) || isnan(impl) +contains_nan(ref::Ref{T}, impl::Ref{T}) where {T} = + contains_nan(ref[], impl[]) + +naninf(f) = isnan(f) || isinf(f) + +@testset "AMOS.s1s2!" begin + function test_s1s2( + zr::ComplexF64, + s1::ComplexF64, + s2::ComplexF64, + ascle::Float64=1e-30, + alim::Float64=1e-6, + iuf::Int=0 + ) + s1_ref, s1_impl = Ref(s1), Ref(s1) + s2_ref, s2_impl = Ref(s2), Ref(s2) + + nz_ref, iuf_ref = AMOS._s1s2!(zr, s1_ref, s2_ref, ascle, alim, iuf) + nz_impl, iuf_impl = AMOS.s1s2!(zr, s1_impl, s2_impl, ascle, alim, iuf) + + @test nz_ref == nz_impl + @test iuf_ref == iuf_impl + if contains_nan(s1_ref, s1_impl) + @test s1_ref[] === s1_impl[] + else + @test s1_ref[] ≈ s1_impl[] + end + if contains_nan(s2_ref, s2_impl) + @test s2_ref[] === s2_impl[] + else + @test s2_ref[] ≈ s2_impl[] + end + end + + test_zr = [ + SPECIAL_FLOAT32..., + ] + s1s2 = [ + SPECIAL_FLOAT32..., + ] |> complex + lim_scale = [ + # 1e-0 ~ 1e-8 + [ 10.0^i for i in 0:-1:-8 ]..., + ] + iuf_test = [ -1, 0, 1, 2 ] + + for zr in test_zr, + s1 in s1s2, + s2 in reverse(s1s2), + ascle in lim_scale, + alim in lim_scale, + iuf in iuf_test + # TODO + naninf(zr) && continue + naninf(s1) && continue + naninf(s2) && continue + + test_s1s2(complex(zr), s1, s2, ascle, alim, iuf) + end +end From 8a4520a4c1ce9699ec9597dadc82b99626e75174 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sun, 21 Jan 2024 15:34:47 -0600 Subject: [PATCH 20/34] amos: move uchk to file --- src/amos/AMOS.jl | 15 +++++++++++++++ src/amos/helper.jl | 33 --------------------------------- src/amos/subroutines/uchk.jl | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 33 deletions(-) create mode 100644 src/amos/subroutines/uchk.jl diff --git a/src/amos/AMOS.jl b/src/amos/AMOS.jl index b49f5f88..de829e89 100644 --- a/src/amos/AMOS.jl +++ b/src/amos/AMOS.jl @@ -3,4 +3,19 @@ include("const.jl") include("helper.jl") include("warp.jl") + +# internal subroutines +_subroutine_names = [ +# No deps, leaf Functions + "uchk" +# Only deps on leaf functions + +# Othsers +] # _subroutine_names + +for fname in _subroutine_names + + include(joinpath("subroutines", "$(fname).jl")) +end + end # AMOS diff --git a/src/amos/helper.jl b/src/amos/helper.jl index b704a5d9..67520e5b 100644 --- a/src/amos/helper.jl +++ b/src/amos/helper.jl @@ -1,38 +1,5 @@ # SPDX-License-Identifier: BSD-3-Clause OR MIT -""" - uchk(y::ComplexF64, ascle::Float64, tol::Float64) - -`y` enters as a scaled quantity whose magnitude is greater than -`Exp(-alim) = ascle = 1.0e+3*D1_MACH(1)/tol`. -The test is made to see if the magnitude of the real or imaginary part would -Underflow when `y` is scaled (by `tol`) to its proper value. -`y` is accepted if the underflow is at least one precision below the magnitude of the largest component; -Otherwise the phase angle does not have absolute accuracy and an underflow is assumed. - -# Impl Ref -- [`openspecfun/amos/zuchk.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/zuchk.f) -- [`scipy/scipy/special/_amos.c:amos_uchk`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L4405-L4439) -""" -function uchk(y::ComplexF64, ascle::Float64, tol::Float64) - yr = abs(real(y)) - yi = abs(imag(y)) - ss = max(yr, yi) - st = min(yr, yi) - - if st > ascle - # TODO: ret ::Bool - return 0 - else - st /= tol - if ss < st - return 1 - else - return 0 - end - end -end - """ gammaln(z::Float64) diff --git a/src/amos/subroutines/uchk.jl b/src/amos/subroutines/uchk.jl new file mode 100644 index 00000000..e56ecb10 --- /dev/null +++ b/src/amos/subroutines/uchk.jl @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: BSD-3-Clause OR MIT + +""" + uchk(y::ComplexF64, ascle::Float64, tol::Float64) + +`y` enters as a scaled quantity whose magnitude is greater than +`Exp(-alim) = ascle = 1.0e+3*D1_MACH(1)/tol`. +The test is made to see if the magnitude of the real or imaginary part would +Underflow when `y` is scaled (by `tol`) to its proper value. +`y` is accepted if the underflow is at least one precision below the magnitude of the largest component; +Otherwise the phase angle does not have absolute accuracy and an underflow is assumed. + +# Impl Ref +- [`openspecfun/amos/zuchk.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/zuchk.f) +- [`scipy/scipy/special/_amos.c:amos_uchk`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L4405-L4439) +""" +function uchk(y::ComplexF64, ascle::Float64, tol::Float64) + yr = abs(real(y)) + yi = abs(imag(y)) + ss = max(yr, yi) + st = min(yr, yi) + + if st > ascle + # TODO: ret ::Bool + return 0 + else + st /= tol + if ss < st + return 1 + else + return 0 + end + end +end From c8f267021a333f11d8029f0bb0b7cfc540103959 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sun, 21 Jan 2024 15:37:16 -0600 Subject: [PATCH 21/34] amos: move gammaln to file --- src/amos/AMOS.jl | 3 +- src/amos/helper.jl | 90 --------------------------------- src/amos/subroutines/gammaln.jl | 90 +++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 91 deletions(-) create mode 100644 src/amos/subroutines/gammaln.jl diff --git a/src/amos/AMOS.jl b/src/amos/AMOS.jl index de829e89..cf9d2a74 100644 --- a/src/amos/AMOS.jl +++ b/src/amos/AMOS.jl @@ -7,7 +7,8 @@ include("warp.jl") # internal subroutines _subroutine_names = [ # No deps, leaf Functions - "uchk" + "uchk", + "gammaln", # Only deps on leaf functions # Othsers diff --git a/src/amos/helper.jl b/src/amos/helper.jl index 67520e5b..e0d5da85 100644 --- a/src/amos/helper.jl +++ b/src/amos/helper.jl @@ -1,95 +1,5 @@ # SPDX-License-Identifier: BSD-3-Clause OR MIT -""" - gammaln(z::Float64) - -Compute the logarithm of the gamma function. - -# fortran comments -DGAMLN COMPUTES THE NATURAL LOG OF THE GAMMA FUNCTION FOR -Z.GT.0. THE ASYMPTOTIC EXPANSION IS USED TO GENERATE VALUES -GREATER THAN ZMIN WHICH ARE ADJUSTED BY THE RECURSION -G(Z+1)=Z*G(Z) FOR Z.LE.ZMIN. THE FUNCTION WAS MADE AS -PORTABLE AS POSSIBLE BY COMPUTIMG ZMIN FROM THE NUMBER OF BASE -10 DIGITS IN A WORD, RLN=AMAX1(-ALOG10(R1MACH(4)),0.5E-18) -LIMITED TO 18 DIGITS OF (RELATIVE) ACCURACY. - -SINCE INTEGER ARGUMENTS ARE COMMON, A TABLE LOOK UP ON 100 -VALUES IS USED FOR SPEED OF EXECUTION. - -# Impl Ref -- [`openspecfun/amos/dgamln.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/dgamln.f) -- [`scipy/scipy/special/_amos.c:amos_gamln`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L3678C8-L3775) -""" -function gammaln(z::Float64) - con::Float64 = 1.83787706640934548 # ln(2*pi) - @assert con == log(big"2" * pi) |> Float64 - - # TODO: handle z <= 0 || isnan(z) first - if z > 0.0 - if z <= 101.0 - nz = Int(trunc(z)) - fz::Float64 = z - nz - if fz <= 0.0 - if nz <= 100 - return GAMMALN_GLN[nz] - end - end - end #= 10 =# - - wdtol = max(D1_MACH[3+1], 1e-18) - i1m = I1_MACH[13+1] - rln = D1_MACH[4+1] * i1m - fln = max(min(rln, 20.0), 3.0) - 3.0 - zm = 1.8 + 0.3875 * fln - mz = trunc(zm) + 1 - zmin = mz - zdmy = z - zinc = 0.0 - if z < zmin - zinc = zmin - nz - zdmy = z + zinc - end #= 20 =# - - zp = 1.0 / zdmy - t1 = GAMMALN_CF[0+1] * zp - s = t1 - if zp >= wdtol - zsq = zp * zp - tst = t1 * wdtol - # NOTE: `i=1` is outside `if` - for i = 2:22 - zp *= zsq - trm = GAMMALN_CF[i] * zp - if abs(trm) < tst - break # Goto 40 - end - - s += trm - end #= 30 =# - end #= 40 =# - - if 0.0 == zinc - tlg = log(z) - return z * (tlg - 1.0) + 0.5 * (con - tlg) + s - end #= 50 =# - - zp = 1.0 - nz = trunc(zinc) - for i = 1:nz - # NOTE: Parentheses are required to avoid overflow - zp *= z + (i - 1) - end #= 60 =# - - tlg = log(zdmy) - return zdmy * (tlg - 1.0) - log(zp) + 0.5 * (con - tlg) + s - end #= 70 =# - - # Zero or negative argument - return NaN -end - - """ kscl!( y::Vector{ComplexF64}, diff --git a/src/amos/subroutines/gammaln.jl b/src/amos/subroutines/gammaln.jl new file mode 100644 index 00000000..8b1c6bed --- /dev/null +++ b/src/amos/subroutines/gammaln.jl @@ -0,0 +1,90 @@ +# SPDX-License-Identifier: BSD-3-Clause OR MIT + +""" + gammaln(z::Float64) + +Compute the logarithm of the gamma function. + +# fortran comments +DGAMLN COMPUTES THE NATURAL LOG OF THE GAMMA FUNCTION FOR +Z.GT.0. THE ASYMPTOTIC EXPANSION IS USED TO GENERATE VALUES +GREATER THAN ZMIN WHICH ARE ADJUSTED BY THE RECURSION +G(Z+1)=Z*G(Z) FOR Z.LE.ZMIN. THE FUNCTION WAS MADE AS +PORTABLE AS POSSIBLE BY COMPUTIMG ZMIN FROM THE NUMBER OF BASE +10 DIGITS IN A WORD, RLN=AMAX1(-ALOG10(R1MACH(4)),0.5E-18) +LIMITED TO 18 DIGITS OF (RELATIVE) ACCURACY. + +SINCE INTEGER ARGUMENTS ARE COMMON, A TABLE LOOK UP ON 100 +VALUES IS USED FOR SPEED OF EXECUTION. + +# Impl Ref +- [`openspecfun/amos/dgamln.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/dgamln.f) +- [`scipy/scipy/special/_amos.c:amos_gamln`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L3678C8-L3775) +""" +function gammaln(z::Float64) + con::Float64 = 1.83787706640934548 # ln(2*pi) + @assert con == log(big"2" * pi) |> Float64 + + # TODO: handle z <= 0 || isnan(z) first + if z > 0.0 + if z <= 101.0 + nz = Int(trunc(z)) + fz::Float64 = z - nz + if fz <= 0.0 + if nz <= 100 + return GAMMALN_GLN[nz] + end + end + end #= 10 =# + + wdtol = max(D1_MACH[3+1], 1e-18) + i1m = I1_MACH[13+1] + rln = D1_MACH[4+1] * i1m + fln = max(min(rln, 20.0), 3.0) - 3.0 + zm = 1.8 + 0.3875 * fln + mz = trunc(zm) + 1 + zmin = mz + zdmy = z + zinc = 0.0 + if z < zmin + zinc = zmin - nz + zdmy = z + zinc + end #= 20 =# + + zp = 1.0 / zdmy + t1 = GAMMALN_CF[0+1] * zp + s = t1 + if zp >= wdtol + zsq = zp * zp + tst = t1 * wdtol + # NOTE: `i=1` is outside `if` + for i = 2:22 + zp *= zsq + trm = GAMMALN_CF[i] * zp + if abs(trm) < tst + break # Goto 40 + end + + s += trm + end #= 30 =# + end #= 40 =# + + if 0.0 == zinc + tlg = log(z) + return z * (tlg - 1.0) + 0.5 * (con - tlg) + s + end #= 50 =# + + zp = 1.0 + nz = trunc(zinc) + for i = 1:nz + # NOTE: Parentheses are required to avoid overflow + zp *= z + (i - 1) + end #= 60 =# + + tlg = log(zdmy) + return zdmy * (tlg - 1.0) - log(zp) + 0.5 * (con - tlg) + s + end #= 70 =# + + # Zero or negative argument + return NaN +end From 8f31410a39fd96aef5eff9552e7c5c76688653ac Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sun, 21 Jan 2024 15:39:00 -0600 Subject: [PATCH 22/34] amos: move s1s2 to file --- src/amos/AMOS.jl | 2 +- src/amos/helper.jl | 68 ----------------------------------- src/amos/subroutines/s1s2.jl | 69 ++++++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 69 deletions(-) create mode 100644 src/amos/subroutines/s1s2.jl diff --git a/src/amos/AMOS.jl b/src/amos/AMOS.jl index cf9d2a74..553f3741 100644 --- a/src/amos/AMOS.jl +++ b/src/amos/AMOS.jl @@ -9,13 +9,13 @@ _subroutine_names = [ # No deps, leaf Functions "uchk", "gammaln", + "s1s2", # Only deps on leaf functions # Othsers ] # _subroutine_names for fname in _subroutine_names - include(joinpath("subroutines", "$(fname).jl")) end diff --git a/src/amos/helper.jl b/src/amos/helper.jl index e0d5da85..c17ab464 100644 --- a/src/amos/helper.jl +++ b/src/amos/helper.jl @@ -146,71 +146,3 @@ function kscl!( # @info "ret end kscl!: " n nz return nz end - -""" - s1s2!( - zr::ComplexF64, - s1_ref::Ref{ComplexF64}, - s2_ref::Ref{ComplexF64}, - ascle::Float64, - alim::Float64, - iuf::Int - ) - -# fortran comments -Zs1s2 tests for a possible underflow resulting from the -addition of the I and k functions in the analytic con- -tinuation formula where s1=k function and s2=i function. -On kode=1 the I and k functions are different orders of -magnitude, but for kode=2 they can be of the same order -of magnitude and the maximum must be at least one -precision above the underflow limit. - -# Impl Ref -- [`openspecfun/amos/zs1s2.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/zs1s2.f) -- [`scipy/scipy/special/_amos.c:amos_s1s2`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L4348-L4402) -""" -function s1s2!( - zr::ComplexF64, - s1_ref::Ref{ComplexF64}, - s2_ref::Ref{ComplexF64}, - ascle::Float64, - alim::Float64, - iuf::Int -) - s1 = s1_ref[] - s2 = s2_ref[] - - nz::Int = 0 - as1 = abs(s1) - as2 = abs(s2) - - if (real(s1) != 0.0 || imag(s1) != 0.0) - if as1 != 0.0 - xx = real(zr) - aln = -xx - xx + log(as1) - s1d = s1 - s1_ref[] = 0.0 - as1 = 0.0 - if aln >= -alim - c1 = log(s1d) - c1 -= zr - c1 -= zr - s1_ref[] = exp(c1) - as1 = abs(s1_ref[]) - iuf += 1 - end - end - end #= 10 =# - - aa = max(as1, as2) - if aa > ascle - return nz, iuf - end - - s1_ref[] = 0.0 - s2_ref[] = 0.0 - nz = 1 - iuf = 0 - return nz, iuf -end diff --git a/src/amos/subroutines/s1s2.jl b/src/amos/subroutines/s1s2.jl new file mode 100644 index 00000000..303b9539 --- /dev/null +++ b/src/amos/subroutines/s1s2.jl @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: BSD-3-Clause OR MIT + +""" + s1s2!( + zr::ComplexF64, + s1_ref::Ref{ComplexF64}, + s2_ref::Ref{ComplexF64}, + ascle::Float64, + alim::Float64, + iuf::Int + ) + +# fortran comments +Zs1s2 tests for a possible underflow resulting from the +addition of the I and k functions in the analytic con- +tinuation formula where s1=k function and s2=i function. +On kode=1 the I and k functions are different orders of +magnitude, but for kode=2 they can be of the same order +of magnitude and the maximum must be at least one +precision above the underflow limit. + +# Impl Ref +- [`openspecfun/amos/zs1s2.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/zs1s2.f) +- [`scipy/scipy/special/_amos.c:amos_s1s2`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L4348-L4402) +""" +function s1s2!( + zr::ComplexF64, + s1_ref::Ref{ComplexF64}, + s2_ref::Ref{ComplexF64}, + ascle::Float64, + alim::Float64, + iuf::Int +) + s1 = s1_ref[] + s2 = s2_ref[] + + nz::Int = 0 + as1 = abs(s1) + as2 = abs(s2) + + if (real(s1) != 0.0 || imag(s1) != 0.0) + if as1 != 0.0 + xx = real(zr) + aln = -xx - xx + log(as1) + s1d = s1 + s1_ref[] = 0.0 + as1 = 0.0 + if aln >= -alim + c1 = log(s1d) + c1 -= zr + c1 -= zr + s1_ref[] = exp(c1) + as1 = abs(s1_ref[]) + iuf += 1 + end + end + end #= 10 =# + + aa = max(as1, as2) + if aa > ascle + return nz, iuf + end + + s1_ref[] = 0.0 + s2_ref[] = 0.0 + nz = 1 + iuf = 0 + return nz, iuf +end From 79e21106dec352a6333634bde82e04997f97799b Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sun, 21 Jan 2024 15:42:06 -0600 Subject: [PATCH 23/34] amos: move kscl to file --- src/amos/AMOS.jl | 4 +--- src/amos/{helper.jl => subroutines/kscl.jl} | 0 2 files changed, 1 insertion(+), 3 deletions(-) rename src/amos/{helper.jl => subroutines/kscl.jl} (100%) diff --git a/src/amos/AMOS.jl b/src/amos/AMOS.jl index 553f3741..585262c5 100644 --- a/src/amos/AMOS.jl +++ b/src/amos/AMOS.jl @@ -1,9 +1,7 @@ module AMOS include("const.jl") -include("helper.jl") include("warp.jl") - # internal subroutines _subroutine_names = [ # No deps, leaf Functions @@ -11,7 +9,7 @@ _subroutine_names = [ "gammaln", "s1s2", # Only deps on leaf functions - + "kscl", # Othsers ] # _subroutine_names diff --git a/src/amos/helper.jl b/src/amos/subroutines/kscl.jl similarity index 100% rename from src/amos/helper.jl rename to src/amos/subroutines/kscl.jl From 7d6bc4237ea40d5f1d074b29ad9dd4755fdf21c1 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sun, 21 Jan 2024 16:50:25 -0600 Subject: [PATCH 24/34] amos: add const INV_2PI --- src/amos/const.jl | 4 ++++ test/amos/const.jl | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/src/amos/const.jl b/src/amos/const.jl index 64082928..43e8c134 100644 --- a/src/amos/const.jl +++ b/src/amos/const.jl @@ -1,5 +1,9 @@ # SPDX-License-Identifier: BSD-3-Clause OR MIT +"`1 / (2*pi)`" +const INV_2PI = 1.0 / (2*pi) + + # TODO: Use NamedTuple? """ I1_MACH :: Int32 diff --git a/test/amos/const.jl b/test/amos/const.jl index bf9c1eef..af32f8f2 100644 --- a/test/amos/const.jl +++ b/test/amos/const.jl @@ -1,4 +1,10 @@ +@testset "AMOS.const" begin + # defined in `amos_asyi`: `pi, rpi` + @test Float64(pi) == 3.14159265358979324 + @test AMOS.INV_2PI == 0.159154943091895336 +end + @testset "AMOS.I1_MACH,D1_MACH" begin # I1_MACH @test size(AMOS.I1_MACH) == (16,) From b89d6f25e4fd6873c944ddbfe37aac16a9bb8214 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sun, 21 Jan 2024 20:50:52 -0600 Subject: [PATCH 25/34] amos: impl `AMOS.asyi!` --- src/amos/AMOS.jl | 1 + src/amos/subroutines/asyi.jl | 160 +++++++++++++++++++++++++++++++++++ src/amos/warp.jl | 48 +++++++++++ test/amos/helper.jl | 64 ++++++++++++++ 4 files changed, 273 insertions(+) create mode 100644 src/amos/subroutines/asyi.jl diff --git a/src/amos/AMOS.jl b/src/amos/AMOS.jl index 585262c5..239c88e2 100644 --- a/src/amos/AMOS.jl +++ b/src/amos/AMOS.jl @@ -8,6 +8,7 @@ _subroutine_names = [ "uchk", "gammaln", "s1s2", + "asyi", # Only deps on leaf functions "kscl", # Othsers diff --git a/src/amos/subroutines/asyi.jl b/src/amos/subroutines/asyi.jl new file mode 100644 index 00000000..45cc4f48 --- /dev/null +++ b/src/amos/subroutines/asyi.jl @@ -0,0 +1,160 @@ +# SPDX-License-Identifier: BSD-3-Clause OR MIT + +""" + +# fortran comments +zasyi computes the i bessel function for real(z).ge.0.0 by +means of the asymptotic expansion for large cabs(z) in the +region cabs(z).gt.max(rl,fnu*fnu/2). nz=0 is a normal return. +nz.lt.0 indicates an overflow on kode=1. + +# Impl Ref +- [`openspecfun/amos/zasyi.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/zasyi.f) +- [`scipy/scipy/special/_amos.c:amos_asyi`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L997-L1120) +""" +function asyi!( + y::Vector{ComplexF64}, + z::ComplexF64, + fnu::Float64, + kode::Int, + n::Int, + rl::Float64, + tol::Float64, + elim::Float64, + alim::Float64 +) + "1 / (2*pi)" + rpi = INV_2PI + + az = abs(z) + arm = 1.0e3 * D1_MACH[1] + rtr1 = sqrt(arm) + il = min(2, n) + dfnu = fnu + (n - il) + + # OVERFLOW TEST + ak1 = sqrt(rpi / z) + cz = z + if kode == 2 + cz = complex(0.0, imag(z)) + end #= 10 =# + + czr = real(cz) + if abs(czr) <= elim + dnu2 = dfnu + dfnu + koded = 1 + if !(abs(czr) > alim && n > 2) + koded = 0 + ak1 *= exp(cz) + end + #= 20 =# + fdn = 0.0 + if dnu2 > rtr1 + fdn = dnu2 * dnu2 + end + ez = z * 8.0 + + # WHEN Z IS IMAGINARY, THE ERROR TEST MUST BE MADE + # RELATIVE TO THE FIRST RECIPROCAL POWER SINCE THIS + # IS THE LEADING TERM OF THE EXPANSION FOR THE + # IMAGINARY PART. + aez = 8.0 * az + s = tol / aez + jl = Int(trunc(rl + rl)) + 2 + yy = imag(z) + p1 = 0.0 + 0.0im + if yy != 0.0 + inu = Int(trunc(fnu)) + arg = (fnu - inu) * pi + inu += n - il + ak = -sin(arg) + bk = cos(arg) + if yy < 0.0 + bk = -bk + end + p1 = complex(ak, bk) + if isodd(inu) + p1 = -p1 + end + end + #= 30 =# + for k in 1:il + sqk = fdn - 1.0 + atol = s * abs(sqk) + sgn = 1.0 + cs1 = 1.0 + 0.0im + cs2 = 1.0 + 0.0im + ck = 1.0 + 0.0im + ak = 0.0 + aa = 1.0 + bb = aez + dk = ez + + converged = false + for _ in 1:jl + ck *= sqk / dk + cs2 += ck + sgn = -sgn + cs1 += ck * sgn + dk += ez + aa *= abs(sqk) / bb + bb += aez + ak += 8.0 + sqk -= ak + if aa <= atol + converged = true + break + end + end + #= 40 =# + if !converged + #= 110 =# + return -2 + end + + #= 50 =# + s2 = cs1 + zr = real(z) + if (zr + zr) < elim + s2 += p1 * cs2 * exp(-z - z) + end + #= 60 =# + fdn += 8.0 * dfnu + 4.0 + p1 = -p1 + m = n - il + k + y[m] = s2 * ak1 + end + #= 70 =# + if n <= 2 + return 0 + end + + nn = n + k = nn - 2 + ak = k + rz = 2.0 / z + ib = 3 + @assert length(y) >= k + for _ in ib:nn + y[k] = (ak + fnu) * rz * y[k+1] + y[k+2] + ak -= 1.0 + k -= 1 + end + #= 80 =# + if koded == 0 + return 0 + end + + ck = exp(cz) + for i in 1:nn + # ck isa ComplexF64 + y[i] *= ck + end + + #= 90 =# + return 0 + end + + #= 100 =# + return -1 +end diff --git a/src/amos/warp.jl b/src/amos/warp.jl index f7681e69..fae2806c 100644 --- a/src/amos/warp.jl +++ b/src/amos/warp.jl @@ -117,3 +117,51 @@ function _s1s2!( s2_ref[] = complex(s2r[], s2i[]) nz[], Int(iuf_ref[]) end + +""" +Will modify `y[]`. + +Return `nz`. +""" +function _asyi!( + y::Vector{ComplexF64}, + z::ComplexF64, + fnu::Float64, + kode::Int, + n::Int, + rl::Float64, + tol::Float64, + elim::Float64, + alim::Float64 +) + y_len = length(y) + yr = real.(y) + yi = imag.(y) + nz = Ref{Int32}() + + ccall((:zasyi_, libopenspecfun), + Cvoid, + ( + # (ZR,ZI), + Ref{Float64}, Ref{Float64}, + # FNU, KODE, N, + Ref{Float64}, Ref{Int32}, Ref{Int32}, + # (YR,YI), + Ptr{Float64}, Ptr{Float64}, + # NZ, RL, TOL, ELIM, ALIM + Ref{Int32}, Ref{Float64}, Ref{Float64}, + Ref{Float64}, Ref{Float64} + ), + real(z), imag(z), + fnu, kode, n, + yr, yi, + nz, rl, tol, elim, alim + ) + + # Update `y` + y_new = complex.(yr, yi) + update_arr!(y, y_new) + @assert length(y) == y_len + + nz[] +end diff --git a/test/amos/helper.jl b/test/amos/helper.jl index c110f9d9..24d24e94 100644 --- a/test/amos/helper.jl +++ b/test/amos/helper.jl @@ -233,6 +233,8 @@ end contains_nan(ref, impl) = isnan(ref) || isnan(impl) contains_nan(ref::Ref{T}, impl::Ref{T}) where {T} = contains_nan(ref[], impl[]) +contains_nan(ref::Vector{T}, impl::Vector{T}) where {T} = + any(contains_nan.(ref, impl)) naninf(f) = isnan(f) || isinf(f) @@ -291,3 +293,65 @@ naninf(f) = isnan(f) || isinf(f) test_s1s2(complex(zr), s1, s2, ascle, alim, iuf) end end + +@testset "AMOS.asyi!" begin + function test_asyi( + y::Vector{ComplexF64}, + z::ComplexF64, + fnu::Float64, + kode::Int, + n::Int, + rl::Float64, + tol::Float64, + elim::Float64, + alim::Float64 + ) + y_ref, y_impl = copy(y), copy(y) + + nz_ref = AMOS._asyi!(y_ref, z, fnu, kode, n, rl, tol, elim, alim) + nz_impl = AMOS.asyi!(y_impl, z, fnu, kode, n, rl, tol, elim, alim) + + @test nz_ref == nz_impl + if contains_nan(y_ref, y_impl) + cmp = all(y_ref .=== y_impl) + @test cmp + if !cmp + @info "contains_nan" y_ref y_impl + @show y z fnu kode n rl tol elim alim + end + else + @test y_ref ≈ y_impl + end + end + + test_y = [ + ComplexF64[], # TODO: n==0 + # n=1 + ComplexF64[0.0], + ComplexF64[1.0], + ComplexF64[pi], + [ rand(ComplexF64, 1) for _ in 1:10 ]..., + # n=2 + ComplexF64[0.0, 0.0], + ComplexF64[1., 1.], + ComplexF64[1., 0.], + ComplexF64[0., 1.], + ComplexF64[1., 2.], + [ rand(ComplexF64, 2) for _ in 1:10 ]..., + # n=5 + ComplexF64[ complex(i,i) for i in 1:5 ], + ] + test_z = [ + SPECIAL_FLOAT32..., + ] |> complex + + for y in test_y, + z in test_z, + fnu in [ 0., 1. ], + kode in [ 1, 2 ] + naninf(z) && continue + z==0.0 && continue # div by zero + + test_asyi(y, z, fnu, kode, length(y), 3.1, 1e-6, eps(), 2*eps()) + end +end From 4feb64b13a588e5daf54e090f09330f5cdeab4d0 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sun, 21 Jan 2024 22:17:56 -0600 Subject: [PATCH 26/34] amos: impl `AMOS.mlri!` --- src/amos/AMOS.jl | 1 + src/amos/subroutines/mlri.jl | 184 +++++++++++++++++++++++++++++++++++ src/amos/warp.jl | 45 +++++++++ test/amos/helper.jl | 61 ++++++++++++ 4 files changed, 291 insertions(+) create mode 100644 src/amos/subroutines/mlri.jl diff --git a/src/amos/AMOS.jl b/src/amos/AMOS.jl index 239c88e2..7d04507f 100644 --- a/src/amos/AMOS.jl +++ b/src/amos/AMOS.jl @@ -11,6 +11,7 @@ _subroutine_names = [ "asyi", # Only deps on leaf functions "kscl", + "mlri", # Othsers ] # _subroutine_names diff --git a/src/amos/subroutines/mlri.jl b/src/amos/subroutines/mlri.jl new file mode 100644 index 00000000..723b37c0 --- /dev/null +++ b/src/amos/subroutines/mlri.jl @@ -0,0 +1,184 @@ +# SPDX-License-Identifier: BSD-3-Clause OR MIT + +""" + +# fortran comments +zmlri computes the i bessel function for re(z).ge.0.0 by the +miller algorithm normalized by a neumann series. + +# Impl Ref +- [`openspecfun/amos/zmlri.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/zmlri.f) +- [`scipy/scipy/special/_amos.c:amos_mlri`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L3778-L3946) +""" +function mlri!( + y::Vector{ComplexF64}, + z::ComplexF64, + fnu::Float64, + kode::Int, + n::Int, + tol::Float64, +) + @assert length(y) >= n + + scle::Float64 = D1_MACH[1] / tol + az = abs(z) + iaz = Int(trunc(az)) + ifnu = Int(trunc(fnu)) + inu::Int = ifnu + n - 1 + at = iaz + 1 + ck::ComplexF64 = at / z + rz = 2.0 / z + p1 = 0.0 + 0.0im + p2 = 1.0 + 0.0im + ack = (at + 1.0) / az + rho = ack + sqrt(ack*ack - 1.0) + rho2 = rho * rho + tst = (rho2 + rho2) / ((rho2 - 1.0) * (rho - 1.0)) + tst /= tol + + # COMPUTE RELATIVE TRUNCATION ERROR INDEX FOR SERIES + ak = at + i::Int = 1 + converged = false + for _i = 1:80 + # NOTE: will use i later + i = _i + pt = p2 + p2 = p1 - ck * p2 + p1 = pt + ck += rz + ap = abs(p2) + if ap > (tst*ak*ak) + converged = true + break + end + ak += 1.0 + end #= 10 =# + if !converged + return -2 + end + + i += 1 + k::Int = 0 + if inu >= iaz + # COMPUTE RELATIVE TRUNCATION ERROR FOR RATIOS + p1 = 0.0 + 0.0im + p2 = 1.0 + 0.0im + at = inu + 1 + ck = at / z + ack = at / az + tst = sqrt(ack / tol) + itime = 1 + k = 1 + converged = false + for _k = 1:80 + # NOTE: will use k later + k = _k + pt = p2 + p2 = p1 - ck * p2 + p1 = pt + ck += rz + ap = abs(p2) + if ap >= tst + if itime == 2 + converged = true + break + end + + ack = abs(ck) + flam = ack + sqrt(ack*ack - 1.0) + fkap = ap / abs(p1) + rho = min(flam, fkap) + tst *= sqrt(rho / (rho*rho - 1.0)) + itime = 2 + end + end #= 30 =# + if !converged + return -2 + end + end #= 40 =# + + # + # BACKWARD RECURRENCE AND SUM NORMALIZING RELATION + # + k += 1 + kk = max(i + iaz, k + inu) + fkk = kk + p1 = 0.0 + 0.0im + # + # SCALE P2 AND SUM BY SCLE + # + p2 = complex(scle) + fnf = fnu - ifnu + tfnf = fnf + fnf + bk = gammaln(fkk + tfnf + 1.0) - gammaln(fkk + 1.0) - gammaln(tfnf + 1.0) + bk = exp(bk) + sum = 0.0 + 0.0im + km = kk - inu + for i = 1:km + pt = p2 + p2 = p1 + (fkk + fnf) * rz * p2 + p1 = pt + ak = 1.0 - tfnf / (fkk + tfnf) + ack = bk * ak + sum += (ack + bk) * p1 + bk = ack + fkk -= 1.0 + end #= 50 =# + + y[n] = p2 + if n != 1 + for i = 2:n + pt = p2 + p2 = p1 + (fkk + fnf) * rz * p2 + p1 = pt + ak = 1.0 - tfnf / (fkk + tfnf) + ack = bk * ak + sum += (ack + bk) * p1 + bk = ack + fkk -= 1.0 + + m = n - i + 1 + y[m] = p2 + end + end #= 60 =# + + #= 70 =# + if ifnu > 0 + for i = 1:ifnu + pt = p2 + p2 = p1 + (fkk + fnf) * rz * p2 + p1 = pt + ak = 1.0 - tfnf / (fkk + tfnf) + ack = bk * ak + sum += (ack + bk) * p1 + bk = ack + fkk -= 1.0 + end + end #= 80 =# + + #= 90 =# + pt::ComplexF64 = z + if kode == 2 + # pt.real = 0.0 + pt -= real(z) + end + p1 = -fnf * log(rz) + pt + ap = gammaln(1.0 + fnf) + pt = p1 - ap + # + # THE DIVISION EXP(PT)/(SUM+P2) IS ALTERED TO AVOID OVERFLOW + # IN THE DENOMINATOR BY SQUARING LARGE QUANTITIES + # + p2 += sum + ap = abs(p2) + p1 = 1.0 / ap + ck = exp(pt) * p1 + pt = conj(p2) * p1 + cnorm = ck * pt + for i = 1:n + y[i] *= cnorm + end + + return 0 +end diff --git a/src/amos/warp.jl b/src/amos/warp.jl index fae2806c..52dd661b 100644 --- a/src/amos/warp.jl +++ b/src/amos/warp.jl @@ -165,3 +165,48 @@ function _asyi!( nz[] end + + +""" +Will modify `y[]`. + +Return `nz`. +""" +function _mlri!( + y::Vector{ComplexF64}, + z::ComplexF64, + fnu::Float64, + kode::Int, + n::Int, + tol::Float64, +) + y_len = length(y) + yr = real.(y) + yi = imag.(y) + nz = Ref{Int32}() + + ccall((:zmlri_, libopenspecfun), + Cvoid, + ( + # (ZR,ZI), + Ref{Float64}, Ref{Float64}, + # FNU, KODE, N, + Ref{Float64}, Ref{Int32}, Ref{Int32}, + # (YR,YI), + Ptr{Float64}, Ptr{Float64}, + # NZ, TOL + Ref{Int32}, Ref{Float64}, + ), + real(z), imag(z), + fnu, kode, n, + yr, yi, + nz, tol, + ) + + # Update `y` + y_new = complex.(yr, yi) + update_arr!(y, y_new) + @assert length(y) == y_len + + nz[] +end diff --git a/test/amos/helper.jl b/test/amos/helper.jl index 24d24e94..666cf988 100644 --- a/test/amos/helper.jl +++ b/test/amos/helper.jl @@ -355,3 +355,64 @@ end test_asyi(y, z, fnu, kode, length(y), 3.1, 1e-6, eps(), 2*eps()) end end + + +@testset "AMOS.mlri!" begin + function test_mlri( + y::Vector{ComplexF64}, + z::ComplexF64, + fnu::Float64, + kode::Int, + n::Int, + tol::Float64 + ) + y_ref, y_impl = copy(y), copy(y) + + nz_ref = AMOS._mlri!(y_ref, z, fnu, kode, n, tol) + nz_impl = AMOS.mlri!(y_impl, z, fnu, kode, n, tol) + + @test nz_ref == nz_impl + if contains_nan(y_ref, y_impl) + cmp = all(y_ref .=== y_impl) + @test cmp + if !cmp + @info "contains_nan" y_ref y_impl + @show y z fnu kode n tol + end + else + @test y_ref ≈ y_impl + end + end + + test_y = [ + # ComplexF64[], # TODO: n==0 + # n=1 + ComplexF64[0.0], + ComplexF64[1.0], + ComplexF64[pi], + [ rand(ComplexF64, 1) for _ in 1:10 ]..., + # n=2 + ComplexF64[0.0, 0.0], + ComplexF64[1., 1.], + ComplexF64[1., 0.], + ComplexF64[0., 1.], + ComplexF64[1., 2.], + [ rand(ComplexF64, 2) for _ in 1:10 ]..., + # n=5 + ComplexF64[ complex(i,i) for i in 1:5 ], + ] + test_z = [ + SPECIAL_FLOAT32..., + ] |> complex + + for y in test_y, + z in test_z, + fnu in [ 0., 1. ], + kode in [ 1, 2 ] + naninf(z) && continue + z==0.0 && continue # div by zero + abs(z)>typemax(Int) && continue # InexactError: Int64 + + test_mlri(y, z, fnu, kode, length(y), eps()) + end +end From b7745548da633be74a6befa33ce22f04d2940b10 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sun, 21 Jan 2024 22:34:45 -0600 Subject: [PATCH 27/34] amos: init `AMOS.seri!` impl --- src/amos/AMOS.jl | 1 + src/amos/subroutines/seri.jl | 29 +++++++++++++++++ src/amos/warp.jl | 47 +++++++++++++++++++++++++++ test/amos/helper.jl | 61 ++++++++++++++++++++++++++++++++++++ 4 files changed, 138 insertions(+) create mode 100644 src/amos/subroutines/seri.jl diff --git a/src/amos/AMOS.jl b/src/amos/AMOS.jl index 7d04507f..e909dfae 100644 --- a/src/amos/AMOS.jl +++ b/src/amos/AMOS.jl @@ -12,6 +12,7 @@ _subroutine_names = [ # Only deps on leaf functions "kscl", "mlri", + "seri", # Othsers ] # _subroutine_names diff --git a/src/amos/subroutines/seri.jl b/src/amos/subroutines/seri.jl new file mode 100644 index 00000000..371b68eb --- /dev/null +++ b/src/amos/subroutines/seri.jl @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: BSD-3-Clause OR MIT + +""" + +# fortran comments +zseri computes the i bessel function for real(z).ge.0.0 by +means of the power series for large cabs(z) in the +region cabs(z).le.2*sqrt(fnu+1). nz=0 is a normal return. +nz.gt.0 means that the last nz components were set to zero +due to underflow. nz.lt.0 means underflow occurred, but the +condition cabs(z).le.2*sqrt(fnu+1) was violated and the +computation must be completed in another routine with n=n-abs(nz). + +# Impl Ref +- [`openspecfun/amos/zseri.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/zseri.f) +- [`scipy/scipy/special/_amos.c:amos_seri`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L4178-L4345) +""" +function seri!( + y::Vector{ComplexF64}, + z::ComplexF64, + fnu::Float64, + kode::Int, + n::Int, + tol::Float64, + elim::Float64, + alim::Float64 +) + +end diff --git a/src/amos/warp.jl b/src/amos/warp.jl index 52dd661b..359a83fe 100644 --- a/src/amos/warp.jl +++ b/src/amos/warp.jl @@ -210,3 +210,50 @@ function _mlri!( nz[] end + + +""" +Will modify `y[]`. + +Return `nz`. +""" +function _seri!( + y::Vector{ComplexF64}, + z::ComplexF64, + fnu::Float64, + kode::Int, + n::Int, + tol::Float64, + elim::Float64, + alim::Float64 +) + y_len = length(y) + yr = real.(y) + yi = imag.(y) + nz = Ref{Int32}() + + ccall((:zseri_, libopenspecfun), + Cvoid, + ( + # (ZR,ZI), + Ref{Float64}, Ref{Float64}, + # FNU, KODE, N, + Ref{Float64}, Ref{Int32}, Ref{Int32}, + # (YR,YI), + Ptr{Float64}, Ptr{Float64}, + # NZ, TOL, ELIM, ALIM + Ref{Int32}, Ref{Float64}, Ref{Float64}, Ref{Float64}, + ), + real(z), imag(z), + fnu, kode, n, + yr, yi, + nz, tol, elim, alim + ) + + # Update `y` + y_new = complex.(yr, yi) + update_arr!(y, y_new) + @assert length(y) == y_len + + nz[] +end diff --git a/test/amos/helper.jl b/test/amos/helper.jl index 666cf988..8ada6989 100644 --- a/test/amos/helper.jl +++ b/test/amos/helper.jl @@ -416,3 +416,64 @@ end test_mlri(y, z, fnu, kode, length(y), eps()) end end + +@testset "AMOS.seri!" begin + function test_seri( + y::Vector{ComplexF64}, + z::ComplexF64, + fnu::Float64, + kode::Int, + n::Int, + tol::Float64, + elim::Float64, + alim::Float64 + ) + y_ref, y_impl = copy(y), copy(y) + + nz_ref = AMOS._seri!(y_ref, z, fnu, kode, n, tol, elim, alim) + # nz_impl = AMOS.seri!(y_impl, z, fnu, kode, n, tol, elim, alim) + + # @test nz_ref == nz_impl + # if contains_nan(y_ref, y_impl) + # cmp = all(y_ref .=== y_impl) + # @test cmp + # if !cmp + # @info "contains_nan" y_ref y_impl + # @show y z fnu kode n tol elim alim + # end + # else + # @test y_ref ≈ y_impl + # end + end + + test_y = [ + # n=1 + ComplexF64[0.0], + ComplexF64[1.0], + ComplexF64[pi], + [ rand(ComplexF64, 1) for _ in 1:10 ]..., + # n=2 + ComplexF64[0.0, 0.0], + ComplexF64[1., 1.], + ComplexF64[1., 0.], + ComplexF64[0., 1.], + ComplexF64[1., 2.], + [ rand(ComplexF64, 2) for _ in 1:10 ]..., + # n=5 + ComplexF64[ complex(i,i) for i in 1:5 ], + ] + test_z = [ + SPECIAL_FLOAT32..., + ] |> complex + + for y in test_y, + z in test_z, + fnu in [ 0., 1. ], + kode in [ 1, 2 ] + naninf(z) && continue + z==0.0 && continue # div by zero + abs(z)>typemax(Int) && continue # InexactError: Int64 + + test_seri(y, z, fnu, kode, length(y), 1e-6, eps(), 2*eps()) + end +end From c710a8c7c8ee5da594122d5a60f4078257091964 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sun, 21 Jan 2024 23:27:53 -0600 Subject: [PATCH 28/34] amos: impl `AMOS.seri!` --- src/amos/subroutines/seri.jl | 210 +++++++++++++++++++++++++++++++++++ test/amos/helper.jl | 24 ++-- 2 files changed, 222 insertions(+), 12 deletions(-) diff --git a/src/amos/subroutines/seri.jl b/src/amos/subroutines/seri.jl index 371b68eb..ab75fa79 100644 --- a/src/amos/subroutines/seri.jl +++ b/src/amos/subroutines/seri.jl @@ -14,6 +14,8 @@ computation must be completed in another routine with n=n-abs(nz). # Impl Ref - [`openspecfun/amos/zseri.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/zseri.f) - [`scipy/scipy/special/_amos.c:amos_seri`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L4178-L4345) + +NOTE: Current implementation uses `@GOTO`! """ function seri!( y::Vector{ComplexF64}, @@ -25,5 +27,213 @@ function seri!( elim::Float64, alim::Float64 ) + @assert length(y) >= n + + w = ComplexF64[ 0.0, 0.0 ] + nz::Int = 0 + az = abs(z) + if az == 0.0 + #= 160 =# + y[1] = 0.0 + 0.0im + if fnu == 0.0 + y[1] = 1.0 + 0.0im + end + #= 170 =# + if n == 1 + return nz + end + + for i in 2:n + y[i] = 0.0 + 0.0im + end + #= 180 =# + return nz + end + + arm = 1.0e3 * D1_MACH[1] + rtr1 = sqrt(arm) + crsc = 1.0 + iflag::Int = 0 + if az >= arm + half_z = 0.5 * z + cz = 0.0 + 0.0im + if az > rtr1 + cz = half_z * half_z + end #= 10 =# + acz = abs(cz) + nn::Int = n + ck = log(half_z) + +@label _line20 + #= 20 =# + dfnu = fnu + (nn - 1) + fnup = dfnu + 1.0 + # + # UNDERFLOW TEST + # + ak1 = ck * dfnu + ak = gammaln(fnup) + ak1 -= ak + if kode == 2 + ak1 -= real(z) + end + rak1 = real(ak1) + if rak1 > (-elim) + @goto _line40 + end + +@label _line30 + #= 30 =# + nz += 1 + y[nn] = 0.0 + if acz > dfnu + #= 190 =# + # + # RETURN WITH NZ.LT.0 IF CABS(Z*Z/4).GT.FNU+N-NZ-1 COMPLETE + # THE CALCULATION IN CBINU WITH N=N-IABS(NZ) + # + return -nz + end + nn -= 1 + if nn == 0 + return nz + end + @goto _line20 + +@label _line40 + #= 40 =# + if rak1 <= -alim + iflag = 1 + ss = 1.0 / tol + crsc = tol + ascle = arm * ss + end + #= 50 =# + ak = imag(ak1) + aa = exp(rak1) + if iflag == 1 + aa *= ss + end + coef = aa * complex(cos(ak), sin(ak)) + atol = tol * acz / fnup + il = min(2, nn) + for i in 1:il + dfnu = fnu + (nn - i) + fnup = dfnu + 1.0 + s1 = 1.0 + 0.0im + if acz >= (tol * fnup) + ak1 = 1.0 + 0.0im + ak = fnup + 2.0 + s = fnup + aa = 2.0 + #= 60 =# + while true + rs = 1.0 / s + ak1 *= cz + ak1 *= rs + s1 += ak1 + s += ak + ak += 2.0 + aa *= acz + aa *= rs + if aa <= atol + break + end + end + end + #= 70 =# + s2 = s1 * coef + w[i] = s2 + if iflag != 0 + if amos_uchk(s2, ascle, tol) + @goto _line30 + end + end + #= 80 =# + m = nn - i + 1 + y[m] = s2 * crsc + if i != il + coef *= dfnu / half_z + end + end + #= 90 =# + if nn <= 2 + return nz + end + + k = nn - 2 + ak = Float64(k) + rz = 2.0 / z + if iflag == 1 + @goto _line120 + end + ib = 3 + +@label _line100 + #= 100 =# + for i in ib:nn + y[k] = (ak + fnu) * rz * y[k + 1] + y[k + 2] + ak -= 1.0 + k -= 1 + end + #= 110 =# + return nz + + # + # RECUR BACKWARD WITH SCALED VALUES + # +@label _line120 + #= 120 =# + # + # EXP(-ALIM)=EXP(-ELIM)/TOL=APPROX. ONE PRECISION ABOVE THE + # UNDERFLOW LIMIT = ASCLE = D1MACH(1)*SS*1.0D+3 + # + s1 = w[1] + s2 = w[2] + for l in 3:nn + ck = s2 + s2 = s1 + (ak + fnu) * rz * s2 + s1 = ck + ck = s2 * crsc + y[k] = ck + ak -= 1.0 + k -= 1 + if abs(ck) > ascle + @goto _line140 + end + end + #= 130 =# + return nz + +@label _line140 + #= 140 =# + ib = l + 1 + if ib > nn + return nz + end + @goto _line100 + end + + + #= 150 =# + nz = n + if fnu == 0.0 + nz -= 1 + end + + #= 160 =# + y[1] = 0.0 + 0.0im + if fnu == 0.0 + y[1] = 1.0 + 0.0im + end + #= 170 =# + if n == 1 + return nz + end + for i in 2:n + y[i] = 0.0 + 0.0im + end + #= 180 =# + return nz end diff --git a/test/amos/helper.jl b/test/amos/helper.jl index 8ada6989..0acf03b6 100644 --- a/test/amos/helper.jl +++ b/test/amos/helper.jl @@ -431,19 +431,19 @@ end y_ref, y_impl = copy(y), copy(y) nz_ref = AMOS._seri!(y_ref, z, fnu, kode, n, tol, elim, alim) - # nz_impl = AMOS.seri!(y_impl, z, fnu, kode, n, tol, elim, alim) + nz_impl = AMOS.seri!(y_impl, z, fnu, kode, n, tol, elim, alim) - # @test nz_ref == nz_impl - # if contains_nan(y_ref, y_impl) - # cmp = all(y_ref .=== y_impl) - # @test cmp - # if !cmp - # @info "contains_nan" y_ref y_impl - # @show y z fnu kode n tol elim alim - # end - # else - # @test y_ref ≈ y_impl - # end + @test nz_ref == nz_impl + if contains_nan(y_ref, y_impl) + cmp = all(y_ref .=== y_impl) + @test cmp + if !cmp + @info "contains_nan" y_ref y_impl + @show y z fnu kode n tol elim alim + end + else + @test y_ref ≈ y_impl + end end test_y = [ From 3e28dab4fc2fdf9aafc2712d38f2e3183cbe54e9 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Mon, 22 Jan 2024 08:24:12 -0600 Subject: [PATCH 29/34] amos: impl `AMOS.bknu!` --- src/amos/AMOS.jl | 1 + src/amos/subroutines/bknu.jl | 564 +++++++++++++++++++++++++++++++++++ src/amos/warp.jl | 47 +++ test/amos/helper.jl | 62 ++++ 4 files changed, 674 insertions(+) create mode 100644 src/amos/subroutines/bknu.jl diff --git a/src/amos/AMOS.jl b/src/amos/AMOS.jl index e909dfae..5978047e 100644 --- a/src/amos/AMOS.jl +++ b/src/amos/AMOS.jl @@ -13,6 +13,7 @@ _subroutine_names = [ "kscl", "mlri", "seri", + "bknu", # Othsers ] # _subroutine_names diff --git a/src/amos/subroutines/bknu.jl b/src/amos/subroutines/bknu.jl new file mode 100644 index 00000000..3f658110 --- /dev/null +++ b/src/amos/subroutines/bknu.jl @@ -0,0 +1,564 @@ +# SPDX-License-Identifier: BSD-3-Clause OR MIT + +""" + +ZBKNU COMPUTES THE K BESSEL FUNCTION IN THE RIGHT HALF Z PLANE. + +# Impl Ref +- [`openspecfun/amos/zbknu.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/zbknu.f) +- [`scipy/scipy/special/_amos.c:amos_bknu`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L3005-L3467) + +NOTE: Current implementation uses `@GOTO`! +""" +function bknu!( + y::Vector{ComplexF64}, + z::ComplexF64, + fnu::Float64, + kode::Int, + n::Int, + tol::Float64, + elim::Float64, + alim::Float64 +) + s1 = 0.0 + 0.0im + s2 = 0.0 + 0.0im + ck = 0.0 + 0.0im + dnu2 = 0.0 + + r1 = 2.0 + tth = 2.0 / 3.0 + cc = Float64[ + 5.77215664901532861e-01, -4.20026350340952355e-02, + -4.21977345555443367e-02, 7.21894324666309954e-03, + -2.15241674114950973e-04, -2.01348547807882387e-05, + 1.13302723198169588e-06, 6.11609510448141582e-09 + ] + + caz = abs(z) + cscl = 1.0 / tol + crsc = tol + css = Float64[cscl, 1.0, crsc] + csr = Float64[crsc, 1.0, cscl] + bry = [ + 1e3 * D1_MACH[1] / tol, + tol / (1e3 * D1_MACH[1]), + D1_MACH[2] + ] + nz = 0 + iflag = 0 + koded = kode + rz = 2.0 / z + inu = Int(trunc(fnu + 0.5)) + dnu = fnu - inu + if abs(dnu) != 0.5 + dnu2 = 0.0 + if abs(dnu) > tol + dnu2 = dnu * dnu + end + if caz <= r1 + # + # SERIES FOR CABS(Z).LE.R1 + # + fc = 1.0 + smu = log(rz) + fmu = smu * dnu + csh = sinh(fmu) + cch = cosh(fmu) + if dnu != 0.0 + fc = dnu * pi + fc *= 1.0 / sin(fc) + smu = csh / dnu + end + #= 10 =# + a2 = 1.0 + dnu + # + # GAM(1-Z)*GAM(1+Z)=PI*Z/SIN(PI*Z), T1=1/GAM(1-DNU), T2=1/GAM(1+DNU) + # + t2 = exp(-gammaln(a2)) + t1 = 1.0 / (t2 * fc) + if abs(dnu) <= 0.1 + # + # SERIES FOR F0 TO RESOLVE INDETERMINACY FOR SMALL ABS(DNU) + # + ak = 1.0 + s = cc[1] + for k = 2:8 + ak *= dnu2 + tm = cc[k] * ak + s += tm + if abs(tm) < tol + break + end + end #= 20 =# + #= 30 =# + g1 = -s + else + #= 40 =# + g1 = (t1 - t2) / (dnu + dnu) + end + #= 50 =# + g2 = 0.5 * (t1 + t2) + f = fc * (g1 * cch + smu * g2) + pt = exp(fmu) + p = (0.5 / t2) * pt + q = (0.5 / t1) / pt + s1 = f + s2 = p + ak = 1.0 + a1 = 1.0 + ck = 1.0 + 0.0im + bk = 1.0 - dnu2 + if (inu <= 0) && (n <= 1) + # + # GENERATE K(FNU,Z), 0.0D0 .LE. FNU .LT. 0.5D0 AND N=1 + # + if caz >= tol + cz = z * z * 0.25 + t1 = 0.25 * caz * caz + #= 60 =# + while true + f = (f * ak + p + q) / bk + p = p / (ak - dnu) + q = q / (ak + dnu) + rk = 1.0 / ak + ck *= cz * rk + s1 += ck * f + a1 *= t1 * rk + bk += ak + ak + 1.0 + ak += 1.0 + if a1 <= tol + break + end + end + end + #= 70 =# + y[1] = s1 + if koded == 1 + return nz + end + + y[1] = s1 * exp(z) + return nz + end + + #= 80 =# + # + # GENERATE K(DNU,Z) AND K(DNU+1,Z) FOR FORWARD RECURRENCE + # + if caz >= tol + cz = z * z * 0.25 + t1 = 0.25 * caz * caz + #= 90 =# + while true + f = (f * ak + p + q) / bk + p *= 1.0 / (ak - dnu) + q *= 1.0 / (ak + dnu) + rk = 1.0 / ak + ck *= cz * rk + s1 += ck * f + s2 += ck * (p - f * ak) + a1 *= t1 * rk + bk += ak + ak + 1.0 + ak += 1.0 + if a1 <= tol + break + end + end + end + #= 100 =# + kflag = 2 + a1 = fnu + 1.0 + ak = a1 * abs(real(smu)) + if ak > alim + kflag = 3 + end + p2 = s2 * css[kflag] + s2 = p2 * rz + s1 *= css[kflag] + if koded != 1 + f = exp(z) + s1 *= f + s2 *= f + end + @goto _line210 + end + end + + #= 110 =# + # + # IFLAG=0 MEANS NO UNDERFLOW OCCURRED + # IFLAG=1 MEANS AN UNDERFLOW OCCURRED- COMPUTATION PROCEEDS WITH + # KODED=2 AND A TEST FOR ON SCALE VALUES IS MADE DURING FORWARD + # RECURSION + # + coef = rthpi / sqrt(z) + kflag = 2 + if koded != 2 + if real(z) > alim + #= 290 =# + # + # SCALE BY DEXP(Z), IFLAG = 1 CASES + # + koded = 2 + iflag = 1 + kflag = 2 + else + a1 = exp(-real(z)) * real(css[kflag]) + pt = a1 * complex(cos(imag(z)), -sin(imag(z))) + coef *= pt + end + end + #= 120 =# + if abs(dnu) == 0.5 + #= 300 =# + # + # FNU=HALF ODD INTEGER CASE, DNU=-0.5 + # + s1 = coef + s2 = coef + @goto _line210 + end + + # + # MILLER ALGORITHM FOR CABS(Z).GT.R1 + # + ak = abs(cos(pi * dnu)) + if ak == 0.0 + #= 300 =# + # + # FNU=HALF ODD INTEGER CASE, DNU=-0.5 + # + s1 = coef + s2 = coef + @goto _line210 + end + + fhs = abs(0.25 - dnu2) + if fhs == 0.0 + #= 300 =# + # + # FNU=HALF ODD INTEGER CASE, DNU=-0.5 + # + s1 = coef + s2 = coef + @goto _line210 + end + + # + # COMPUTE R2=F(E). IF ABS(Z) >= R2, USE FORWARD RECURRENCE TO + # DETERMINE THE BACKWARD INDEX K. R2=F(E) IS A STRAIGHT LINE ON + # 12 <= E <= 60. E IS COMPUTED FROM 2**(-E)=B**(1-DIGITS(0.0_dp))= + # TOL WHERE B IS THE BASE OF THE ARITHMETIC. + # + t1 = I1_MACH[14] - 1 + t1 *= D1_MACH[5] * (log(10) / log(2)) + t1 = min(max(t1, 12.0), 60.0) + t2 = tth * t1 - 6.0 + if real(z) == 0.0 + t1 = hpi + else #= 130 =# + t1 = abs(atan(imag(z) / real(z))) + end + #= 140 =# + if t2 <= caz + # + # FORWARD RECURRENCE LOOP WHEN CABS(Z).GE.R2 + # + etest = ak / (pi * caz * tol) + fk = 1.0 + if etest < 1.0 + @goto _line180 + end + + fks = 2.0 + rk = caz + caz + 2.0 + a1 = 0.0 + a2 = 1.0 + for i = 1:kmax + ak = fhs / fks + bk = rk / (fk + 1.0) + tm = a2 + a2 = bk * a2 - ak * a1 + a1 = tm + rk += 2.0 + fks += fk + fk + 2.0 + fhs += fk + fk + fk += 1.0 + tm = abs(a2) * fk + if etest < tm + break + end + if i == kmax + #= 310 =# + return -2 + end + end #= 150 =# + + #= 160 =# + fk += spi * t1 * sqrt(t2 / caz) + fhs = abs(0.25 - dnu2) + else + #= 170 =# + # + # COMPUTE BACKWARD INDEX K FOR CABS(Z).LT.R2 + # + a2 = sqrt(caz) + ak *= fpi / (tol * sqrt(a2)) + aa = 3.0 * t1 / (1.0 + caz) + bb = 14.7 * t1 / (28.0 + caz) + ak = (log(ak) + caz * cos(aa) / (1.0 + 0.008 * caz)) / cos(bb) + fk = 0.12125 * ak * ak / caz + 1.5 + end + +@label _line180 + #= 180 =# + # + # BACKWARD RECURRENCE LOOP FOR MILLER ALGORITHM + # + k = Int(trunc(fk)) + fk = float(k) + fks = fk * fk + p1 = 0.0 + 0.0im + p2 = complex(tol) + cs = p2 + for i = 1:k + a1 = fks - fk + a2 = (fks + fk) / (a1 + fhs) + rk = 2.0 / (fk + 1.0) + t1 = (fk + real(z)) * rk + t2 = imag(z) * rk + pt = p2 + p2 = (p2 * complex(t1, t2) - p1) * a2 + p1 = pt + cs += p2 + fks = a1 - fk + 1.0 + fk -= 1.0 + end + #= 190 =# + + # + # COMPUTE (P2/CS)=(P2/CABS(CS))*(CONJG(CS)/CABS(CS)) + # FOR BETTER SCALING + # + tm = abs(cs) + pt = 1.0 / tm + s1 = pt * p2 + cs = conj(cs) * pt + s1 *= coef * cs + if (inu <= 0) && (n <= 1) + zd = z + if iflag == 1 + @goto _line270 + end + @goto _line240 + end + #= 200 =# + + # + # COMPUTE P1/P2=(P1/CABS(P2)*CONJG(P2)/CABS(P2) FOR SCALING + # + tm = abs(p2) + pt = 1.0 / tm + p1 = pt * p1 + p2 = conj(p2) * pt + pt = p1 * p2 + s2 = s1 * (1.0 + (dnu + 0.5 - pt) / z) + +@label _line210 + #= 210 =# + # + # FORWARD RECURSION ON THE THREE TERM RECURSION WITH RELATION WITH + # SCALING NEAR EXPONENT EXTREMES ON KFLAG=1 OR KFLAG=3 + # + ck = (dnu + 1.0) * rz + if n == 1 + inu -= 1 + end + if inu <= 0 + if n <= 1 + s1 = s2 + end + #= 215 =# + zd = z + if iflag == 1 + @goto _line270 + end + @goto _line240 + end + + #= 220 =# + inub = 1 + if iflag == 1 + @goto _line261 + end + +@label _line225 + #= 225 =# + p1 = csr[kflag] + ascle = bry[kflag] + for i = inub:inu + st = s2 + s2 = ck * s2 + s1 + s1 = st + ck += rz + if kflag < 3 + p2 = s2 * p1 + p2m = max(abs(real(p2)), abs(imag(p2))) + if p2m > ascle + kflag += 1 + ascle = bry[kflag] + s1 *= p1 + s2 = p2 + s1 *= css[kflag] + s2 *= css[kflag] + p1 = csr[kflag] + end + end + end + #= 230 =# + if n == 1 + s1 = s2 + end + +@label _line240 + #= 240 =# + y[1] = s1 * csr[kflag] + if n == 1 + return nz + end + + y[2] = s2 * csr[kflag] + if n == 2 + return nz + end + + kk = 2 +@label _line250 + #= 250 =# + kk += 1 + if kk > n + return nz + end + + p1 = csr[kflag] + ascle = bry[kflag] + for i = kk:n + p2 = s2 + s2 = ck * s2 + s1 + s1 = p2 + ck += rz + p2 = s2 * p1 + y[i] = p2 + if kflag < 3 + p2m = max(abs(real(p2)), abs(imag(p2))) + if p2m > ascle + kflag += 1 + ascle = bry[kflag] + s1 *= p1 + s2 = p2 + s1 *= css[kflag] + s2 *= css[kflag] + p1 = csr[kflag] + end + end + end + #= 260 =# + return nz + +@label _line261 + #= 261 =# + # + # IFLAG=1 CASES, FORWARD RECURRENCE ON SCALED VALUES ON UNDERFLOW + # + elm = exp(-elim) + ascle = bry[1] + zd = z + xd = real(z) + yd = imag(z) + ic = -1 + j = 2 + for i = 1:inu + st = s2 + s2 = ck * s2 + s1 + s1 = st + ck += rz + as = abs(s2) + alas = log(as) + p2r = alas - xd + if p2r >= -elim + p2 = -zd + clog(s2) + p2r = real(p2) + p2i = imag(p2) + p2m = exp(p2r) / tol + p1 = p2m * complex(cos(p2i), sin(p2i)) + if uchk(p1, ascle, tol) + j = 3 - j + cy[j] = p1 + if ic == i - 1 + @goto _line264 + end + ic = i + continue + end + if alas >= 0.5 * elim + xd -= elim; + s1 *= elm; + s2 *= elm; + zd = complex(xd, yd); + end + end + #= 262 =# + end + if n == 1 + s1 = s2 + end + @goto _line270 + +@label _line264 + #= 264 =# + kflag = 1; + inub = i + 1; + s2 = cy[j]; + j = 3 - j; + s1 = cy[j]; + if inub <= inu + @goto _line225 + end + if n == 1 + s1 = s2; + end + @goto _line240; + +@label _line270 + #= 270 =# + y[1] = s1; + if n == 1 + y[2] = s2 + end + #= 280 =# + ascle = bry[1]; + nz = kscl!(y, zd, fnu, n, rz, ascle, tol, elim); + inu = n - nz; + if inu <= 0 + return nz; + end + + kk = nz + 1; + s1 = y[kk-1]; + y[kk-1] = s1 * csr[0]; + if inu == 1 + return nz; + end + + kk = nz + 2; + s2 = y[kk-1]; + y[kk-1] = s2 * csr[0]; + if inu == 2 + return nz; + end + + t2 = fnu + (kk-1); + ck = t2 * rz; + kflag = 1; + @goto _line250; +end diff --git a/src/amos/warp.jl b/src/amos/warp.jl index 359a83fe..25a18a43 100644 --- a/src/amos/warp.jl +++ b/src/amos/warp.jl @@ -257,3 +257,50 @@ function _seri!( nz[] end + + +""" +Will modify `y[]`. + +Return `nz`. +""" +function _bknu!( + y::Vector{ComplexF64}, + z::ComplexF64, + fnu::Float64, + kode::Int, + n::Int, + tol::Float64, + elim::Float64, + alim::Float64 +) + y_len = length(y) + yr = real.(y) + yi = imag.(y) + nz = Ref{Int32}() + + ccall((:zbknu_, libopenspecfun), + Cvoid, + ( + # (ZR,ZI), + Ref{Float64}, Ref{Float64}, + # FNU, KODE, N, + Ref{Float64}, Ref{Int32}, Ref{Int32}, + # (YR,YI), + Ptr{Float64}, Ptr{Float64}, + # NZ, TOL, ELIM, ALIM + Ref{Int32}, Ref{Float64}, Ref{Float64}, Ref{Float64}, + ), + real(z), imag(z), + fnu, kode, n, + yr, yi, + nz, tol, elim, alim + ) + + # Update `y` + y_new = complex.(yr, yi) + update_arr!(y, y_new) + @assert length(y) == y_len + + nz[] +end diff --git a/test/amos/helper.jl b/test/amos/helper.jl index 0acf03b6..747a8e40 100644 --- a/test/amos/helper.jl +++ b/test/amos/helper.jl @@ -477,3 +477,65 @@ end test_seri(y, z, fnu, kode, length(y), 1e-6, eps(), 2*eps()) end end + +# TODO: improve code coverage +@testset "AMOS.bknu!" begin + function test_bknu( + y::Vector{ComplexF64}, + z::ComplexF64, + fnu::Float64, + kode::Int, + n::Int, + tol::Float64, + elim::Float64, + alim::Float64 + ) + y_ref, y_impl = copy(y), copy(y) + + nz_ref = AMOS._bknu!(y_ref, z, fnu, kode, n, tol, elim, alim) + nz_impl = AMOS.bknu!(y_impl, z, fnu, kode, n, tol, elim, alim) + + @test nz_ref == nz_impl + if contains_nan(y_ref, y_impl) + cmp = all(y_ref .=== y_impl) + @test cmp + if !cmp + @info "contains_nan" y_ref y_impl + @show y z fnu kode n tol elim alim + end + else + @test y_ref ≈ y_impl + end + end + + test_y = [ + # n=1 + ComplexF64[0.0], + ComplexF64[1.0], + ComplexF64[pi], + [ rand(ComplexF64, 1) for _ in 1:10 ]..., + # n=2 + ComplexF64[0.0, 0.0], + ComplexF64[1., 1.], + ComplexF64[1., 0.], + ComplexF64[0., 1.], + ComplexF64[1., 2.], + [ rand(ComplexF64, 2) for _ in 1:10 ]..., + # n=5 + ComplexF64[ complex(i,i) for i in 1:5 ], + ] + test_z = [ + SPECIAL_FLOAT32..., + ] |> complex + + for y in test_y, + z in test_z, + fnu in [ 0., 1. ], + kode in [ 1, 2 ] + naninf(z) && continue + z==0.0 && continue # div by zero + abs(z)>typemax(Int) && continue # InexactError: Int64 + + test_bknu(y, z, fnu, kode, length(y), 1e-6, eps(), 2*eps()) + end +end From 3236baa1dbc9f1d5253cfdb1d524c3bd1737fed9 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Mon, 22 Jan 2024 08:50:39 -0600 Subject: [PATCH 30/34] amos: bknu: add missing var init --- src/amos/subroutines/bknu.jl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/amos/subroutines/bknu.jl b/src/amos/subroutines/bknu.jl index 3f658110..777c6ff0 100644 --- a/src/amos/subroutines/bknu.jl +++ b/src/amos/subroutines/bknu.jl @@ -20,12 +20,19 @@ function bknu!( elim::Float64, alim::Float64 ) + # TODO move to const.jl + kmax = 30 + r1 = 2.0 + rthpi = 1.25331413731550025 + spi = 1.90985931710274403 + hpi = 1.57079632679489662 + fpi = 1.89769999331517738 + s1 = 0.0 + 0.0im s2 = 0.0 + 0.0im ck = 0.0 + 0.0im dnu2 = 0.0 - r1 = 2.0 tth = 2.0 / 3.0 cc = Float64[ 5.77215664901532861e-01, -4.20026350340952355e-02, From 2734b525aa0b49d399cda9f69dbd2a7f34132bf2 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Mon, 22 Jan 2024 08:50:57 -0600 Subject: [PATCH 31/34] amos: impl `AMOS.acai!` --- src/amos/AMOS.jl | 1 + src/amos/subroutines/acai.jl | 124 +++++++++++++++++++++++++++++++++++ src/amos/warp.jl | 50 ++++++++++++++ test/amos/helper.jl | 66 +++++++++++++++++++ 4 files changed, 241 insertions(+) create mode 100644 src/amos/subroutines/acai.jl diff --git a/src/amos/AMOS.jl b/src/amos/AMOS.jl index 5978047e..ada046da 100644 --- a/src/amos/AMOS.jl +++ b/src/amos/AMOS.jl @@ -15,6 +15,7 @@ _subroutine_names = [ "seri", "bknu", # Othsers + "acai", ] # _subroutine_names for fname in _subroutine_names diff --git a/src/amos/subroutines/acai.jl b/src/amos/subroutines/acai.jl new file mode 100644 index 00000000..e18307e6 --- /dev/null +++ b/src/amos/subroutines/acai.jl @@ -0,0 +1,124 @@ +# SPDX-License-Identifier: BSD-3-Clause OR MIT + +""" + +# fortran comments +ZACAI APPLIES THE ANALYTIC CONTINUATION FORMULA + + K(FNU,ZN*EXP(MP))=K(FNU,ZN)*EXP(-MP*FNU) - MP*I(FNU,ZN) + MP=PI*MR*CMPLX(0.0,1.0) + +TO CONTINUE THE K FUNCTION FROM THE RIGHT HALF TO THE LEFT +HALF Z PLANE FOR USE WITH ZAIRY WHERE FNU=1/3 OR 2/3 AND N=1. +ZACAI IS THE SAME AS ZACON WITH THE PARTS FOR LARGER ORDERS AND +RECURRENCE REMOVED. A RECURSIVE CALL TO ZACON CAN RESULT IF ZACON +IS CALLED FROM ZAIRY. + +# Impl Ref +- [`openspecfun/amos/zacai.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/zacai.f) +- [`scipy/scipy/special/_amos.c:amos_acai`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L383-L485) + +NOTE: Current implementation uses `@GOTO`! +""" +function acai!( + y::Vector{ComplexF64}, + z::ComplexF64, + fnu::Float64, + kode::Int, + mr::Int, + n::Int, + rl::Float64, + tol::Float64, + elim::Float64, + alim::Float64 +) + int_nu = Int(trunc(fnu)) + + cy = ComplexF64[ 0.0, 0.0 ] + nz::Int = 0 + zn = -z + az = abs(z) + dfnu = fnu + (n - 1) + if (az > 2.0) && ((az * az * 0.25) > (dfnu + 1.0)) + #= 20 =# + if az >= rl + # + # Asymptotic expansion for large z for the I function + # + nw = asyi!(y, zn, fnu, kode, n, rl, tol, elim, alim) + else + #= 30 =# + # + # Miller algorithm normalized by the series for the I function + # + nw = mlri!(y, zn, fnu, kode, n, tol) + end + if nw < 0 + #= 80 =# + nz = -1 + if nw == -2 + nz = -2 + end + return nz + end + else + #= 10 =# + # + # Power series for the I function + # + seri!(y, zn, fnu, kode, n, tol, elim, alim) + end + + #= 40 =# + # + # Analytic continuation to the left half plane for the K function + # + nw = bknu!(cy, zn, fnu, kode, 1, tol, elim, alim) + if nw != 0 + #= 80 =# + nz = -1 + if nw == -2 + nz = -2 + end + return nz + end + + fmr = mr + sgn = fmr < 0.0 ? pi : -pi + csgn = complex(0.0, sgn) + + if kode != 1 + yy = -imag(zn) + cpn = cos(yy) + spn = sin(yy) + csgn *= complex(cpn, spn) + end + + #= 50 =# + # + # Calculate cspn = exp(fnu*pi*i) to minimize losses of significance + # when fnu is large + # + arg = (fnu - int_nu) * sgn + cpn = cos(arg) + spn = sin(arg) + cspn = complex(cpn, spn) + if isodd(int_nu) + cspn = -cspn + end + + #= 60 =# + c1 = Ref(cy[1]) + c2 = Ref(y[1]) + + if kode != 1 + iuf = 0 + ascle = 1e3 * D1_MACH[1] / tol + nw, iuf = s1s2!(zn, c1, c2, ascle, alim, iuf) + nz += nw + end + + #= 70 =# + y[1] = cspn * c1[] + csgn * c2[] + return nz +end diff --git a/src/amos/warp.jl b/src/amos/warp.jl index 25a18a43..5fd9314f 100644 --- a/src/amos/warp.jl +++ b/src/amos/warp.jl @@ -304,3 +304,53 @@ function _bknu!( nz[] end + + +""" +Will modify `y[]`. + +Return `nz`. +""" +function _acai!( + y::Vector{ComplexF64}, + z::ComplexF64, + fnu::Float64, + kode::Int, + mr::Int, + n::Int, + rl::Float64, + tol::Float64, + elim::Float64, + alim::Float64 +) + y_len = length(y) + yr = real.(y) + yi = imag.(y) + nz = Ref{Int32}() + + ccall((:zacai_, libopenspecfun), + Cvoid, + ( + # (ZR,ZI), + Ref{Float64}, Ref{Float64}, + # FNU, KODE, MR, N, + Ref{Float64}, Ref{Int32}, Ref{Int32}, Ref{Int32}, + # (YR,YI), + Ptr{Float64}, Ptr{Float64}, + # NZ, RL, TOL, ELIM, ALIM + Ref{Int32}, Ref{Float64}, Ref{Float64}, + Ref{Float64}, Ref{Float64} + ), + real(z), imag(z), + fnu, kode, mr, n, + yr, yi, + nz, rl, tol, elim, alim + ) + + # Update `y` + y_new = complex.(yr, yi) + update_arr!(y, y_new) + @assert length(y) == y_len + + nz[] +end diff --git a/test/amos/helper.jl b/test/amos/helper.jl index 747a8e40..15b42d2d 100644 --- a/test/amos/helper.jl +++ b/test/amos/helper.jl @@ -539,3 +539,69 @@ end test_bknu(y, z, fnu, kode, length(y), 1e-6, eps(), 2*eps()) end end + + +@testset "AMOS.acai!" begin + function test_acai( + y::Vector{ComplexF64}, + z::ComplexF64, + fnu::Float64, + kode::Int, + mr::Int, + n::Int, + rl::Float64, + tol::Float64, + elim::Float64, + alim::Float64 + ) + y_ref, y_impl = copy(y), copy(y) + + nz_ref = AMOS._acai!(y_ref, z, fnu, kode, mr, n, rl, tol, elim, alim) + nz_impl = AMOS.acai!(y_impl, z, fnu, kode, mr, n, rl, tol, elim, alim) + + if contains_nan(y_ref, y_impl) + cmp = all(y_ref .=== y_impl) + if !cmp + # @info "contains_nan" y_ref y_impl + # @show y z fnu kode mr n rl tol elim alim + @test_broken cmp + else + @test nz_ref == nz_impl + @test cmp + end + else + @test nz_ref == nz_impl + @test y_ref ≈ y_impl + end + end + + test_y = [ + # n=1 + ComplexF64[0.0], + ComplexF64[1.0], + ComplexF64[pi], + [ rand(ComplexF64, 1) for _ in 1:10 ]..., + # n=2 + ComplexF64[0.0, 0.0], + ComplexF64[1., 1.], + ComplexF64[1., 0.], + ComplexF64[0., 1.], + ComplexF64[1., 2.], + [ rand(ComplexF64, 2) for _ in 1:10 ]..., + # n=5 + ComplexF64[ complex(i,i) for i in 1:5 ], + ] + test_z = [ + SPECIAL_FLOAT32..., + ] |> complex + + for y in test_y, + z in test_z, + fnu in [ 0., 1. ], + kode in [ 1, 2 ] + naninf(z) && continue + z==0.0 && continue # div by zero + + test_acai(y, z, fnu, kode, 1, length(y), 3.1, 1e-6, eps(), 2*eps()) + end +end From 222fa448b3ca078c6dc013246fae25e60280eb06 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Mon, 22 Jan 2024 14:30:53 -0600 Subject: [PATCH 32/34] BUG:amos: fix typo in `bknu` --- src/amos/subroutines/bknu.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/amos/subroutines/bknu.jl b/src/amos/subroutines/bknu.jl index 777c6ff0..bcc8ca96 100644 --- a/src/amos/subroutines/bknu.jl +++ b/src/amos/subroutines/bknu.jl @@ -539,7 +539,7 @@ function bknu!( @label _line270 #= 270 =# y[1] = s1; - if n == 1 + if n != 1 y[2] = s2 end #= 280 =# @@ -551,15 +551,15 @@ function bknu!( end kk = nz + 1; - s1 = y[kk-1]; - y[kk-1] = s1 * csr[0]; + s1 = y[kk]; + y[kk] = s1 * csr[1]; if inu == 1 return nz; end kk = nz + 2; - s2 = y[kk-1]; - y[kk-1] = s2 * csr[0]; + s2 = y[kk]; + y[kk] = s2 * csr[1]; if inu == 2 return nz; end From b4f18d80c754549891045404c72502de2e6c6eee Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Mon, 22 Jan 2024 14:31:17 -0600 Subject: [PATCH 33/34] amos: impl `AMOS.airy` --- src/amos/AMOS.jl | 3 + src/amos/airy.jl | 317 ++++++++++++++++++++++++++++++++++++++++++++ src/amos/warp.jl | 5 + test/amos/helper.jl | 51 +++++++ 4 files changed, 376 insertions(+) create mode 100644 src/amos/airy.jl diff --git a/src/amos/AMOS.jl b/src/amos/AMOS.jl index ada046da..f5f00e98 100644 --- a/src/amos/AMOS.jl +++ b/src/amos/AMOS.jl @@ -22,4 +22,7 @@ for fname in _subroutine_names include(joinpath("subroutines", "$(fname).jl")) end +# AMOS API +include("airy.jl") + end # AMOS diff --git a/src/amos/airy.jl b/src/amos/airy.jl new file mode 100644 index 00000000..5435e2df --- /dev/null +++ b/src/amos/airy.jl @@ -0,0 +1,317 @@ +# SPDX-License-Identifier: BSD-3-Clause OR MIT + +""" + +COMPUTE AIRY FUNCTIONS AI(Z) AND DAI(Z) FOR COMPLEX Z + +# parameters +- `z` Complex Number +- `id`: ORDER OF DERIVATIVE + id=0, or id=1 +- `kode`: INDICATE THE SCALING OPTION + kode=1, + AI=AI(Z) ON ID=0 OR + AI=DAI(Z)/DZ ON ID=1 + kode=2, + AI=CEXP(ZTA)*AI(Z) ON ID=0 OR + AI=CEXP(ZTA)*DAI(Z)/DZ ON ID=1 WHERE + ZTA=(2/3)*Z*CSQRT(Z) + +# Impl Ref +- [`openspecfun/amos/zairy.f`](https://github.com/JuliaMath/openspecfun/blob/v0.5.6/amos/zairy.f) +- [`scipy/scipy/special/_amos.c:amos_airy`](https://github.com/scipy/scipy/blob/b882f1b7ebe55e534f29a8d68a54e4ecd30aeb1a/scipy/special/_amos.c#L650-L994) +""" +function airy(z::ComplexF64, id::Int, kode::Int) + # aa, ad, ak, alim, atrm, az, az3, bk, ck, dig, dk, d1, d2, elim, fid, fnu, rl, r1m5, sfac, tol, bb, alaz = 0.0 + TTH = 2.0 / 3.0 + GAMMA_C1 = 0.35502805388781723926 # 1/(Gamma(2/3) * 3**(2/3)) + GAMMA_C2 = 0.25881940379280679841 # 1/(Gamma(1/3) * 3**(1/3)) + COEF = 0.18377629847393068317 # 1 / (sqrt(3) * PI) + + cy = [ 0.0 + 0.0im ] + zta = 0.0 + 0.0im + csq = 0.0 + 0.0im + + # TODO: return or throw + ierr = 0 + nz = 0 + ai = 0.0 + 0.0im + if (id < 0) || (id > 1) + ierr = 1 + end + if (kode < 1) || (kode > 2) + ierr = 1 + end + if ierr != 0 + # TODO: throw(AmosException(ierr) + return 0.0 + 0.0im + end + + az = abs(z) + tol = max(D1_MACH[4], 1e-18) + fid = float(id) + + if az <= 1.0 + # + # POWER SERIES FOR ABS(Z) <= 1. + # + s1 = 1.0 + 0.0im + s2 = 1.0 + 0.0im + if az < tol + #= 170 =# + aa = 1.0e3 * D1_MACH[1] + s1 = 0.0 + 0.0im + if id != 1 + if az > aa + s1 = GAMMA_C2 * z + end + #= 180 =# + ai = GAMMA_C1 - s1 + # @info "180: ai" nz ierr + # @show csq s1 ai + return ai + end + + #= 190 =# + ai = complex(-GAMMA_C2) + aa = sqrt(aa) + if az > aa + s1 = z * z * 0.5 + end + #= 200 =# + ai += s1 * GAMMA_C1 + # @info "200: ai" nz ierr + # @show csq s1 ai + return ai + end + + aa = az * az + if aa >= (tol / az) + trm1 = 1.0 + 0.0im + trm2 = 1.0 + 0.0im + atrm = 1.0 + z3 = z * z * z + az3 = az * aa + ak = 2.0 + fid + bk = 3.0 - fid - fid + ck = 4.0 - fid + dk = 3.0 + fid + fid + d1 = ak * dk + d2 = bk * ck + ad = min(d1, d2) + ak = 24.0 + 9.0 * fid + bk = 30.0 - 9.0 * fid + for k = 1:25 + trm1 *= z3 / d1 + s1 += trm1 + trm2 *= z3 / d2 + s2 += trm2 + atrm *= az3 / ad + d1 += ak + d2 += bk + ad = (d1 > d2 ? d2 : d1) + if atrm < tol * ad + break + end + ak += 18.0 + bk += 18.0 + end + end #= 30 =# + #= 40 =# + if id != 1 + ai = s1*GAMMA_C1 - z*s2*GAMMA_C2 + if kode == 1 + return ai + end + + zta = z * sqrt(z) * TTH + ai *= exp(zta) + # @info "40: ai" nz ierr + # @show zta + return ai + end + #= 50 =# + ai = -s2 * GAMMA_C2 + if az > tol + ai += z * z * s1 * GAMMA_C1 / (1.0 + fid) + end + if kode == 1 + return ai + end + + zta = z * sqrt(z) * TTH + # @info "50: ai * exp(zta)" nz ierr + # @show csq s1 ai + return ai * exp(zta) + end + + #= 70 =# + # + # CASE FOR CABS(Z).GT.1.0 + # + fnu = (1.0 + fid) / 3.0 + # + # SET PARAMETERS RELATED TO MACHINE CONSTANTS. + # TOL IS THE APPROXIMATE UNIT ROUNDOFF LIMITED TO 1.0E-18. + # ELIM IS THE APPROXIMATE EXPONENTIAL OVER- AND UNDERFLOW LIMIT. + # EXP(-ELIM) < EXP(-ALIM)=EXP(-ELIM)/TOL AND + # EXP(ELIM) > EXP(ALIM)=EXP(ELIM)*TOL ARE INTERVALS NEAR + # UNDERFLOW AND OVERFLOW LIMITS WHERE SCALED ARITHMETIC IS DONE. + # RL IS THE LOWER BOUNDARY OF THE ASYMPTOTIC EXPANSION FOR LARGE Z. + # DIG = NUMBER OF BASE 10 DIGITS IN TOL = 10**(-DIG). + # + k1 = trunc(Int, I1_MACH[15]) + k2 = trunc(Int, I1_MACH[16]) + r1m5 = D1_MACH[5] + k = min(abs(k1), abs(k2)) + elim = 2.303 * (k * r1m5 - 3.0) + k1 = trunc(Int, I1_MACH[14]) - 1 + aa = r1m5 * k1 + dig = min(aa, 18.0) + aa *= 2.303 + alim = elim + max(-aa, -41.45) + rl = 1.2 * dig + 3.0 + alaz = log(az) + + # + # TEST FOR PROPER RANGE + # + aa = 0.5 / tol + bb = I1_MACH[9] * 0.5 + aa = min(aa, bb) + aa = aa^(TTH) + if az > aa + #= 260 =# + ierr = 4 + nz = 0 + # TODO: throw(AmosException(ierr) + # @info "260: 0.0im" nz ierr + # @show az aa + return 0.0 + 0.0im + end + + aa = sqrt(aa) + if az > aa + ierr = 3 + end + csq = sqrt(z) + zta = z * csq * TTH + + # + # RE(ZTA).LE.0 WHEN RE(Z).LT.0, ESPECIALLY WHEN IM(Z) IS SMALL + # + iflag = 0 + sfac = 1.0 + ak = imag(zta) + if real(z) < 0.0 + bk = real(zta) + ck = -abs(bk) + zta = complex(ck, ak) + end + #= 80 =# + if (imag(z) == 0.0) && (real(z) <= 0.0) + zta = complex(0.0, ak) + end + #= 90 =# + aa = real(zta) + if (aa < 0.0) || (real(z) <= 0.0) + if kode != 2 + # + # OVERFLOW TEST + # + if aa <= -alim + aa = -aa + 0.25 * alaz + iflag = 1 + sfac = tol + if aa > elim + #= 270 =# + nz = 0 + ierr = 2 + # TODO: throw(AmosException(ierr) + # @info "90: ai" nz ierr iflag + # @show aa sfac + return ai + end + end + end + + #= 100 =# + # + # CBKNU AND CACON RETURN EXP(ZTA)*K(FNU,ZTA) ON KODE=2 + # + mr = 1 + if imag(z) < 0.0 + mr = -1 + end + nn = acai!(cy, zta, fnu, kode, mr, 1, rl, tol, elim, alim) + @assert length(cy) == 1 + if nn < 0 + #= 280 =# + if nn == -1 + nz = 1 + # @info "280: 0.0" nz ierr iflag + return 0.0 + else + nz = 0 + ierr = 5 + # TODO: throw(AmosException(ierr) + # @info "208-else: 0.0" nz ierr iflag + return 0.0 + end + end + + nz += nn + else + #= 110 =# + if kode != 2 + # + # OVERFLOW TEST + # + if aa >= alim + aa = -aa - 0.25 * alaz + iflag = 2 + sfac = 1.0 / tol + if aa < -elim + nz = 1 + # @info "110: 0.0" nz ierr iflag + return 0.0 + end + end + end + #= 120 =# + nz = bknu!(cy, zta, fnu, kode, 1, tol, elim, alim) + @assert length(cy) == 1 + end + + #= 130 =# + s1 = cy[1] * COEF + + # 0: normal; 3: underflow + @assert ierr==0 || ierr==3 + if iflag == 0 + if id != 1 + # @info "130: csq * s1" nz ierr iflag + # @show csq s1 + return csq * s1 + end + #= 140 =# + # @info "140: csq * s1" nz ierr iflag + # @show csq s1 + return (-z * s1) + end + + #= 150 =# + s1 *= sfac + if id != 1 + s1 *= csq + # @info "150: s1 / sfac" nz ierr iflag + # @show csq s1 + return (s1 / sfac) + end + + #= 160 =# + s1 *= -z + # @info "160: s1 / sfac" nz ierr iflag + # @show csq s1 + return (s1 / sfac) +end diff --git a/src/amos/warp.jl b/src/amos/warp.jl index 5fd9314f..d0503483 100644 --- a/src/amos/warp.jl +++ b/src/amos/warp.jl @@ -354,3 +354,8 @@ function _acai!( nz[] end + +# +# `_airy(z::Complex{Float64}, id::Int32, kode::Int32)` +# defined in SpecialFunctions +# diff --git a/test/amos/helper.jl b/test/amos/helper.jl index 15b42d2d..e1674da3 100644 --- a/test/amos/helper.jl +++ b/test/amos/helper.jl @@ -605,3 +605,54 @@ end test_acai(y, z, fnu, kode, 1, length(y), 3.1, 1e-6, eps(), 2*eps()) end end + + +@testset "AMOS.airy" begin + function test_airy( + z::ComplexF64, + id::Int, + kode::Int, + ) + y_ref = SpecialFunctions._airy(z, Int32(id), Int32(kode)) + y_impl = AMOS.airy(z, id, kode) + + if contains_nan(y_ref, y_impl) + cmp = all(y_ref .=== y_impl) + @test cmp + if !cmp + @info "contains_nan" y_ref y_impl + @show z id kode + end + else + @test y_ref ≈ y_impl + if !isapprox(y_ref, y_impl) + @info "!isapprox" y_ref y_impl + @show z id kode + end + end + end + + test_z = Float64[ + SPECIAL_FLOAT32..., + [ rand() for _ in 1:10 ]..., + pi, + [ 10.0^i for i in 1:6 ]..., + ] |> complex + test_z = [ + test_z..., + [ rand(ComplexF64) for _ in 1:10 ]..., + + 0.1 + 0.2im, + 3.14 + 2.7im, + 2.7 + 3.14im + ] + + for z in gen_phase4(test_z), + id in [ 0, 1 ], + kode in [ 1, 2 ] + isnan(z) && continue # Int64(NaN) + abs(z)>1.3e6 && continue # too large arg + + test_airy(z, id, kode) + end +end From c18e324dfeb1eba6060131d6d9c7829d14f73daf Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Mon, 22 Jan 2024 14:32:48 -0600 Subject: [PATCH 34/34] amos: todo add more tests --- test/amos/helper.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/amos/helper.jl b/test/amos/helper.jl index e1674da3..c9ad855a 100644 --- a/test/amos/helper.jl +++ b/test/amos/helper.jl @@ -647,7 +647,8 @@ end 2.7 + 3.14im ] - for z in gen_phase4(test_z), + # TODO: gen_phase4 + for z in (test_z), id in [ 0, 1 ], kode in [ 1, 2 ] isnan(z) && continue # Int64(NaN)