From cc4cb36ab23ef95426a02d33583f883d7ab07831 Mon Sep 17 00:00:00 2001 From: wheeheee <104880306+wheeheee@users.noreply.github.com> Date: Sun, 15 Dec 2024 00:27:30 +0800 Subject: [PATCH 1/6] start moving to convolutions.jl move nextfastfft to convolutions.jl --- ext/OffsetArraysExt.jl | 2 +- src/DSP.jl | 12 +- src/Filters/Filters.jl | 3 +- src/convolutions.jl | 685 +++++++++++++++++++++++++++++++++++++++++ src/dspbase.jl | 648 +------------------------------------- src/util.jl | 32 +- test/dsp.jl | 3 +- 7 files changed, 698 insertions(+), 687 deletions(-) create mode 100644 src/convolutions.jl diff --git a/ext/OffsetArraysExt.jl b/ext/OffsetArraysExt.jl index 66b1dee67..3b6a981eb 100644 --- a/ext/OffsetArraysExt.jl +++ b/ext/OffsetArraysExt.jl @@ -2,6 +2,6 @@ module OffsetArraysExt import DSP import OffsetArrays -DSP.conv_axis_with_offset(::OffsetArrays.IdOffsetRange) = true +DSP.Convolutions.conv_axis_with_offset(::OffsetArrays.IdOffsetRange) = true end diff --git a/src/DSP.jl b/src/DSP.jl index f92ac10d4..86cd90624 100644 --- a/src/DSP.jl +++ b/src/DSP.jl @@ -1,19 +1,19 @@ module DSP -using FFTW -using LinearAlgebra: Transpose, mul!, rmul! -using IterTools: subsets +using LinearAlgebra: mul! -export conv, conv!, deconv, filt, filt!, xcorr +export deconv, filt, filt!, xcorr # This function has methods added in `periodograms` but is not exported, # so we define it here so one can do `DSP.allocate_output` instead of # `DSP.Periodograms.allocate_output`. function allocate_output end -include("dspbase.jl") +include("convolutions.jl") +include("dspbase.jl") include("util.jl") + include("unwrap.jl") include("windows.jl") include("periodograms.jl") @@ -23,7 +23,7 @@ include("estimation.jl") include("diric.jl") using Reexport -@reexport using .Util, .Windows, .Periodograms, .Filters, .LPC, .Unwrap, .Estimation +@reexport using .Util, .Windows, .Periodograms, .Filters, .LPC, .Unwrap, .Estimation, .Convolutions include("deprecated.jl") end diff --git a/src/Filters/Filters.jl b/src/Filters/Filters.jl index 557f4581b..69910d830 100644 --- a/src/Filters/Filters.jl +++ b/src/Filters/Filters.jl @@ -7,7 +7,8 @@ import Base: * using LinearAlgebra: I, mul!, rmul! using Statistics: middle using SpecialFunctions: ellipk -using ..DSP: optimalfftfiltlength, os_fft_complexity, SMALL_FILT_CUTOFF +using ..Convolutions: optimalfftfiltlength, os_fft_complexity +using ..DSP: SMALL_FILT_CUTOFF import ..DSP: filt, filt! using FFTW diff --git a/src/convolutions.jl b/src/convolutions.jl new file mode 100644 index 000000000..cfd12ddf4 --- /dev/null +++ b/src/convolutions.jl @@ -0,0 +1,685 @@ +module Convolutions + +using LinearAlgebra: Transpose, mul! +using IterTools: subsets +using FFTW + +export conv, conv!, nextfastfft + +# Get next fast FFT size for a given signal length +const FAST_FFT_SIZES = (2, 3, 5, 7) + +""" + nextfastfft(n::Integer) + nextfastfft(ns::Tuple) + +Return an estimate for the padded size of an array that +is optimal for the performance of an n-dimensional FFT. + +For `Tuple` inputs, this returns a `Tuple` that is the size +of the padded array. + +For `Integer` inputs, this returns the closest product of 2, 3, 5, and 7 +greater than or equal to `n`. +FFTW contains optimized kernels for these sizes and computes +Fourier transforms of inputs that are a product of these +sizes faster than for inputs of other sizes. + +!!! warning + + Currently, `nextfastfft(ns::Tuple)` is implemented as + `nextfastfft.(ns)`. This may change in future releases if + a better estimation model is found. + + It is recommended to use `nextfastfft.(ns)` to obtain + a tuple of padded lengths for 1D FFTs. +""" +nextfastfft(n::Integer) = nextprod(FAST_FFT_SIZES, n) +nextfastfft(ns::Tuple{Vararg{Integer}}) = nextfastfft.(ns) + + +""" + _zeropad!(padded::AbstractVector, + u::AbstractVector, + padded_axes = axes(padded), + data_dest::Tuple = (1,), + data_region = CartesianIndices(u)) + +Place the portion of `u` specified by `data_region` into `padded`, starting at +location `data_dest`. Sets the rest of `padded` to zero. This will mutate +`padded`. `padded_axes` must correspond to the axes of `padded`. + +""" +@inline function _zeropad!( + padded::AbstractVector, + u::AbstractVector, + padded_axes = axes(padded), + data_dest::Tuple = (first(padded_axes[1]),), + data_region = CartesianIndices(u), +) + datasize = length(data_region) + # Use axes to accommodate arrays that do not start at index 1 + data_first_i = first(data_region)[1] + dest_first_i = data_dest[1] + copyto!(padded, dest_first_i, u, data_first_i, datasize) + padded[first(padded_axes[1]):dest_first_i - 1] .= 0 + padded[dest_first_i + datasize : end] .= 0 + + padded +end + +@inline function _zeropad!( + padded::AbstractArray, + u::AbstractArray, + padded_axes = axes(padded), + data_dest::Tuple = first.(padded_axes), + data_region = CartesianIndices(u), +) + fill!(padded, zero(eltype(padded))) + dest_axes = UnitRange.(data_dest, data_dest .+ size(data_region) .- 1) + dest_region = CartesianIndices(dest_axes) + copyto!(padded, dest_region, u, data_region) + + padded +end + +""" + _zeropad(u, padded_size, [data_dest, data_region]) + +Creates and returns a new base-1 index array of size `padded_size`, with the +section of `u` specified by `data_region` copied into the region of the new + array as specified by `data_dest`. All other values will be initialized to + zero. + +If either `data_dest` or `data_region` is not specified, then the defaults +described in [`_zeropad!`](@ref) will be used. +""" +function _zeropad(u, padded_size, args...) + padded = similar(u, padded_size) + _zeropad!(padded, u, axes(padded), args...) +end + +function _zeropad_keep_offset(u, padded_size, u_axes, args...) + ax_starts = first.(u_axes) + new_axes = UnitRange.(ax_starts, ax_starts .+ padded_size .- 1) + _zeropad!(similar(u, new_axes), u, args...) +end + +function _zeropad_keep_offset( + u, padded_size, ::NTuple{<:Any,Base.OneTo{Int}}, args... +) + _zeropad(u, padded_size, args...) +end + +""" + _zeropad_keep_offset(u, padded_size, [data_dest, dat_region]) + +Like [`_zeropad`](@ref), but retains the first index of `u` when creating a new +array. +""" +function _zeropad_keep_offset(u, padded_size, args...) + _zeropad_keep_offset(u, padded_size, axes(u), args...) +end + +""" +Estimate the number of floating point multiplications per output sample for an +overlap-save algorithm with fft size `nfft`, and filter size `nb`. +""" +os_fft_complexity(nfft, nb) = (nfft * log2(nfft) + nfft) / (nfft - nb + 1) + +""" +Determine the length of FFT that minimizes the number of multiplications per +output sample for an overlap-save convolution of vectors of size `nb` and `nx`. +""" +function optimalfftfiltlength(nb, nx) + nfull = nb + nx - 1 + + # Step through possible nffts and find the nfft that minimizes complexity + # Assumes that complexity is convex + first_pow2 = ceil(Int, log2(nb)) + max_pow2 = ceil(Int, log2(nfull)) + prev_complexity = os_fft_complexity(2^first_pow2, nb) + pow2 = first_pow2 + 1 + while pow2 <= max_pow2 + new_complexity = os_fft_complexity(2^pow2, nb) + new_complexity > prev_complexity && break + prev_complexity = new_complexity + pow2 += 1 + end + nfft = pow2 > max_pow2 ? 2 ^ max_pow2 : 2 ^ (pow2 - 1) + + if nfft > nfull + # If nfft > nfull, then it's better to find next fast power + nfft = nextfastfft(nfull) + end + + nfft +end + +""" +Prepare buffers and FFTW plans for convolution. The two buffers, tdbuff and +fdbuff may be an alias of each other, because complex convolution only requires +one buffer. The plans are mutating where possible, and the inverse plan is +unnormalized. +""" +@inline function os_prepare_conv(u::AbstractArray{T,N}, nffts) where {T<:Real,N} + tdbuff = similar(u, nffts) + bufsize = ntuple(i -> i == 1 ? nffts[i] >> 1 + 1 : nffts[i], N) + fdbuff = similar(u, Complex{T}, NTuple{N,Int}(bufsize)) + + p = plan_rfft(tdbuff) + ip = plan_brfft(fdbuff, nffts[1]) + + tdbuff, fdbuff, p, ip +end + +@inline function os_prepare_conv(u::AbstractArray{<:Complex}, nffts) + buff = similar(u, nffts) + + p = plan_fft!(buff) + ip = inv(p).p + + buff, buff, p, ip # Only one buffer for complex +end + +""" +Transform the smaller convolution input to frequency domain, and return it in a +new array. However, the contents of `buff` may be modified. +""" +@inline function os_filter_transform!(buff::AbstractArray{<:Real}, p) + p * buff +end + +@inline function os_filter_transform!(buff::AbstractArray{<:Complex}, p!) + copy(p! * buff) # p operates in place on buff +end + +""" +Take a block of data, and convolve it with the smaller convolution input. This +may modify the contents of `tdbuff` and `fdbuff`, and the result will be in +`tdbuff`. +""" +@inline function os_conv_block!(tdbuff::AbstractArray{<:Real}, + fdbuff::AbstractArray, + filter_fd, + p, + ip) + mul!(fdbuff, p, tdbuff) + fdbuff .*= filter_fd + mul!(tdbuff, ip, fdbuff) +end + +"Like the real version, but only operates on one buffer" +@inline function os_conv_block!(buff::AbstractArray{<:Complex}, + ::AbstractArray, # Only one buffer for complex + filter_fd, + p!, + ip!) + p! * buff # p! operates in place on buff + buff .*= filter_fd + ip! * buff # ip! operates in place on buff +end + +# Used by `unsafe_conv_kern_os!` to handle blocks of input data that need to be padded. +# +# For a given number of edge dimensions, convolve all regions along the +# perimeter that have that number of edge dimensions +# +# For a 3d cube, if n_edges = 1, this means all faces. If n_edges = 2, then +# all edges. Finally, if n_edges = 3, then all corners. +# +# This needs to be a separate function for subsets to generate tuple elements, +# which is only the case if the number of edges is a Val{n} type. Iterating over +# the number of edges with Val{n} is inherently type unstable, so this function +# boundary allows dispatch to make efficient code for each number of edge +# dimensions. +function unsafe_conv_kern_os_edge!( + # These arrays and buffers will be mutated + out::AbstractArray{<:Any,N}, + tdbuff, + fdbuff, + perimeter_range, + # Number of edge dimensions to pad and convolve + n_edges::Val, + # Data to be convolved + u, + filter_fd, + # FFTW plans + p, + ip, + # Sizes, ranges, and other pre-calculated constants + # + ## ranges listing center and edge blocks + edge_ranges, + center_block_ranges, + ## size and axis information + all_dims, # 1:N + su, + u_start, + sv, + nffts, + out_start, + out_stop, + save_blocksize, + sout_deficit, # How many output samples are missing if nffts > sout + tdbuff_axes, +) where N + # Iterate over all possible combinations of edge dimensions for a number of + # edges + # + # For a 3d cube with n_edges = 1, this will specify the top and bottom faces + # (edge_dims = (1,)), then the left and right faces (edge_dims = (2,)), then + # the front and back faces (edge_dims = (3,)) + for edge_dims in subsets(all_dims, n_edges) + # Specify a region on the perimeter by combining an edge block index for + # each dimension on an edge, and the central blocks for dimensions not + # on an edge. + # + # First make all entries equal to the center blocks: + copyto!(perimeter_range, 1, center_block_ranges, 1, N) + + # For the dimensions chosen to be on an edge (edge_dims), get the + # ranges of the blocks that would need to be padded (lie on an edge) + # in that dimension. + # + # There can be one to two such ranges for each dimension, because with + # some inputs sizes the whole convolution is just one range + # (one edge block), or the padding will be needed at both the leading + # and trailing side of that dimension + selected_edge_ranges = getindex.(Ref(edge_ranges), edge_dims) + + # Visit each combination of edge ranges for the edge dimensions chosen. + # For a 3d cube with n_edges = 1 and edge_dims = (1,), this will visit + # the top face, and then the bottom face. + for perimeter_edge_ranges in Iterators.ProductIterator(selected_edge_ranges) + # The center region for non-edge dimensions has been specified above, + # so finish specifying the region of the perimeter for this edge + # block + for (i, dim) in enumerate(edge_dims) + perimeter_range[dim] = perimeter_edge_ranges[i] + end + + # Region of block indices, not data indices! + block_region = CartesianIndices( + NTuple{N,UnitRange{Int}}(perimeter_range) + ) + for block_pos in block_region + # Figure out which portion of the input data should be transformed + + block_idx = convert(NTuple{N,Int}, block_pos) + ## data_offset is NOT the beginning of the region that will be + ## convolved, but is instead the beginning of the unaliased data. + data_offset = save_blocksize .* (block_idx .- 1) + ## How many zeros will need to be added before the data + pad_before = max.(0, sv .- data_offset .- 1) + data_ideal_stop = data_offset .+ save_blocksize + ## How many zeros will need to be added after the data + pad_after = max.(0, data_ideal_stop .- su) + + ## Data indices, not block indices + data_region = CartesianIndices( + UnitRange.( + u_start .+ data_offset .- sv .+ pad_before .+ 1, + u_start .+ data_ideal_stop .- pad_after .- 1 + ) + ) + + # Convolve portion of input + + _zeropad!(tdbuff, u, tdbuff_axes, pad_before .+ 1, data_region) + os_conv_block!(tdbuff, fdbuff, filter_fd, p, ip) + + # Save convolved result to output + + block_out_stop = min.( + out_start .+ data_offset .+ save_blocksize .- 1, + out_stop + ) + block_out_region = CartesianIndices( + UnitRange.(out_start .+ data_offset, block_out_stop) + ) + ## If the input could not fill tdbuff, account for that before + ## copying the convolution result to the output + u_deficit = max.(0, pad_after .- sv .+ 1) + valid_buff_region = CartesianIndices( + UnitRange.(sv, nffts .- u_deficit .- sout_deficit) + ) + copyto!(out, block_out_region, tdbuff, valid_buff_region) + end + end + end +end + +# Assumes u is larger than, or the same size as, v +# nfft should be greater than or equal to 2*sv-1 +function unsafe_conv_kern_os!(out, + output_indices, + u::AbstractArray{<:Any,N}, + v, + nffts) where N + sout = size(out) + su = size(u) + sv = size(v) + u_start = first.(axes(u)) + out_start = Tuple(first(output_indices)) + out_stop = Tuple(last(output_indices)) + ideal_save_blocksize = nffts .- sv .+ 1 + # Number of samples that are "missing" if the output is smaller than the + # valid portion of the convolution + sout_deficit = max.(0, ideal_save_blocksize .- sout) + # Size of the valid portion of the convolution result + save_blocksize = ideal_save_blocksize .- sout_deficit + nblocks = cld.(sout, save_blocksize) + + # Pre-allocation + tdbuff, fdbuff, p, ip = os_prepare_conv(out, nffts) + tdbuff_axes = axes(tdbuff) + + # Transform the smaller filter + _zeropad!(tdbuff, v) + filter_fd = os_filter_transform!(tdbuff, p) + filter_fd .*= 1 / prod(nffts) # Normalize once for brfft + + # block indices for center blocks, which need no padding + first_center_blocks = cld.(sv .- 1, save_blocksize) .+ 1 + last_center_blocks = fld.(su, save_blocksize) + center_block_ranges = UnitRange.(first_center_blocks, last_center_blocks) + + # block index ranges for blocks that need to be padded + # Corresponds to the leading and trailing side of a dimension, or if there + # are no center blocks, corresponds to the whole dimension + edge_ranges = map(nblocks, first_center_blocks, last_center_blocks) do nblock, firstfull, lastfull + lastfull > 1 ? [1:firstfull-1, lastfull+1:nblock] : [1:nblock] + end + all_dims = 1:N + val_dims = ntuple(Val, Val(N)) + # Buffer to store ranges of indices for a single region of the perimeter + perimeter_range = Vector{UnitRange{Int}}(undef, N) + + # Convolve all blocks that require padding. + # + # This is accomplished by dividing the perimeter of the volume into + # subsections, where the division is done by the number of edge dimensions. + # For a 3d cube, this convolves the perimeter in the following order: + # + # Number of Edge Dimensions | Convolved Region + # --------------------------+----------------- + # 1 | Faces of Cube + # 2 | Edges of Cube + # 3 | Corners of Cube + # + for n_edges in val_dims + unsafe_conv_kern_os_edge!( + # These arrays and buffers will be mutated + out, + tdbuff, + fdbuff, + perimeter_range, + # Number of edge dimensions to pad and convolve + n_edges, + # Data to be convolved + u, + filter_fd, + # FFTW plans + p, + ip, + # Sizes, ranges, and other pre-calculated constants + # + ## ranges listing center and edge blocks + edge_ranges, + center_block_ranges, + ## size and axis information + all_dims, # 1:N + su, + u_start, + sv, + nffts, + out_start, + out_stop, + save_blocksize, + sout_deficit, + tdbuff_axes) # How many output samples are missing if nffts > sout + end + + tdbuff_region = CartesianIndices(tdbuff) + # Portion of buffer with valid result of convolution + valid_buff_region = CartesianIndices(UnitRange.(sv, nffts)) + # Iterate over block indices (not data indices) that do not need to be padded + for block_pos in CartesianIndices(center_block_ranges) + # Calculate portion of data to transform + + block_idx = convert(NTuple{N,Int}, block_pos) + ## data_offset is NOT the beginning of the region that will be + ## convolved, but is instead the beginning of the unaliased data. + data_offset = save_blocksize .* (block_idx .- 1) + data_stop = data_offset .+ save_blocksize + data_region = CartesianIndices( + UnitRange.(u_start .+ data_offset .- sv .+ 1, u_start .+ data_stop .- 1) + ) + + # Convolve this portion of the data + + copyto!(tdbuff, tdbuff_region, u, data_region) + os_conv_block!(tdbuff, fdbuff, filter_fd, p, ip) + + # Save convolved result to output + + block_out_region = CartesianIndices( + UnitRange.(data_offset .+ out_start, data_stop .+ out_start .- 1) + ) + copyto!(out, block_out_region, tdbuff, valid_buff_region) + end + + out +end + +function _conv_kern_fft!(out::AbstractArray{T,N}, + output_indices, + u::AbstractArray{<:Real,N}, + v::AbstractArray{<:Real,N}) where {T<:Real,N} + outsize = size(output_indices) + nffts = nextfastfft(outsize) + padded = _zeropad!(similar(u, T, nffts), u) + p = plan_rfft(padded) + uf = p * padded + _zeropad!(padded, v) + vf = p * padded + uf .*= vf + raw_out = irfft(uf, nffts[1]) + copyto!(out, + output_indices, + raw_out, + CartesianIndices(outsize)) +end +function _conv_kern_fft!(out::AbstractArray{T}, output_indices, u, v) where {T} + outsize = size(output_indices) + nffts = nextfastfft(outsize) + upad = _zeropad!(similar(u, T, nffts), u) + vpad = _zeropad!(similar(v, T, nffts), v) + p! = plan_fft!(upad) + ip! = inv(p!) + p! * upad # Operates in place on upad + p! * vpad + upad .*= vpad + ip! * upad + copyto!(out, + output_indices, + upad, + CartesianIndices(outsize)) +end + +function _conv_td!(out, output_indices, u::AbstractArray{<:Number,N}, v::AbstractArray{<:Number,N}) where {N} + index_offset = first(CartesianIndices(u)) + first(CartesianIndices(v)) - first(output_indices) + checkbounds(out, output_indices) + fill!(out, zero(eltype(out))) + if size(u, 1) ≤ size(v, 1) # choose more efficient iteration order + for m in CartesianIndices(u), n in CartesianIndices(v) + @inbounds out[n+m - index_offset] = muladd(u[m], v[n], out[n+m - index_offset]) + end + else + for n in CartesianIndices(v), m in CartesianIndices(u) + @inbounds out[n+m - index_offset] = muladd(u[m], v[n], out[n+m - index_offset]) + end + end + return out +end + +# whether the given axis are to be considered to carry an offset for `conv!` and `conv` +conv_axis_with_offset(::Base.OneTo) = false +conv_axis_with_offset(a::Any) = throw(ArgumentError(lazy"unsupported axis type $(typeof(a))")) + +function conv_axes_with_offset(as::Tuple...) + with_offset = ((map(a -> map(conv_axis_with_offset, a), as)...)...,) + if !allequal(with_offset) + throw(ArgumentError("cannot mix offset and non-offset axes")) + end + return !isempty(with_offset) && first(with_offset) +end + +const FFTTypes = Union{Float32,Float64,ComplexF32,ComplexF64} + +""" + conv!(out, u, v; algorithm=:auto) + +Convolution of two arrays `u` and `v` with the result stored in `out`. `out` +must be large enough to store the entire result; if it is even larger, the +excess entries will be zeroed. + +`out`, `u`, and `v` can be N-dimensional arrays, with arbitrary indexing +offsets. If none of them has offset axes, +`size(out,d) ≥ size(u,d) + size(v,d) - 1` must hold. If both input and output +have offset axes, `firstindex(out,d) ≤ firstindex(u,d) + firstindex(v,d)` and +`lastindex(out,d) ≥ lastindex(u,d) + lastindex(v,d)` must hold (for d = 1,...,N). +A mix of offset and non-offset axes is not permitted. + +The `algorithm` keyword allows choosing the algorithm to use: +* `:direct`: Evaluates the convolution sum in time domain. +* `:fft_simple`: Evaluates the convolution as a product in the frequency domain. +* `:fft_overlapsave`: Evaluates the convolution block-wise as a product in the + frequency domain, overlapping the resulting blocks. +* `:fft`: Selects the faster of `:fft_simple` and `:fft_overlapsave` (as + estimated from the input size). +* `:fast`: Selects the fastest of `:direct`, `:fft_simple` and + `:fft_overlapsave` (as estimated from the input size). +* `:auto` (default): Equivalent to `:fast` if the data type is known to be + suitable for FFT-based computation, equivalent to `:direct` otherwise. + +!!! warning + The choices made by `:fft`, `:fast`, and `:auto` are based on performance + heuristics which may not result in the fastest algorithm in all cases. If + best performance for a certain size/type combination is required, it is + advised to do individual benchmarking and explicitly specify the desired + algorithm. +""" +function conv!( + out::AbstractArray{T,N}, + u::AbstractArray{<:Number,N}, + v::AbstractArray{<:Number,N}; + algorithm=:auto +) where {T<:Number,N} + offset = conv_axes_with_offset(axes(out), axes(u), axes(v)) ? 0 : 1 + output_indices = CartesianIndices(map(axes(out), axes(u), axes(v)) do ao, au, av + return (first(au)+first(av) : last(au)+last(av)) .- offset + end) + + if algorithm === :auto + algorithm = T <: FFTTypes ? :fast : :direct + end + if algorithm === :fast + if length(u) * length(v) < 2^16 # TODO: better heuristic + algorithm = :direct + else + algorithm = :fft + end + end + if algorithm === :direct + return _conv_td!(out, output_indices, u, v) + else + if output_indices != CartesianIndices(out) + fill!(out, zero(eltype(out))) + end + os_nffts = length(u) >= length(v) ? map(optimalfftfiltlength, size(v), size(u)) : map(optimalfftfiltlength, size(u), size(v)) + if algorithm === :fft + if any(os_nffts .< size(output_indices)) + algorithm = :fft_overlapsave + else + algorithm = :fft_simple + end + end + if algorithm === :fft_overlapsave + # v should be smaller than u for good performance + if length(u) >= length(v) + return unsafe_conv_kern_os!(out, output_indices, u, v, os_nffts) + else + return unsafe_conv_kern_os!(out, output_indices, v, u, os_nffts) + end + elseif algorithm === :fft_simple + return _conv_kern_fft!(out, output_indices, u, v) + else + throw(ArgumentError("algorithm must be :auto, :fast, :direct, :fft, :fft_simple, or :fft_overlapsave")) + end + end +end + +function conv_output_axes(au::Tuple, av::Tuple) + if conv_axes_with_offset(au, av) + return map((au, av) -> first(au)+first(av):last(au)+last(av), au, av) + else + return map((au, av) -> Base.OneTo(last(au) + last(av) - 1), au, av) + end +end + +""" + conv(u, v; algorithm) + +Convolution of two arrays. A convolution algorithm is automatically chosen among +direct convolution, FFT, or FFT overlap-save, depending on the size of the +input, unless explicitly specified with the `algorithm` keyword argument; see +[`conv!`](@ref) for details. +""" +function conv( + u::AbstractArray{Tu,N}, v::AbstractArray{Tv,N}; kwargs... +) where {Tu<:Number,Tv<:Number,N} + T = promote_type(Tu, Tv) + out_axes = conv_output_axes(axes(u), axes(v)) + out = similar(u, T, out_axes) + return conv!(out, u, v; kwargs...) +end + +function conv(A::AbstractArray{<:Number,M}, + B::AbstractArray{<:Number,N}; kwargs...) where {M,N} + if (M < N) + conv(cat(A, dims=N)::AbstractArray{eltype(A),N}, B; kwargs...) + else + @assert M > N + conv(A, cat(B, dims=M)::AbstractArray{eltype(B),M}; kwargs...) + end +end + +""" + conv(u,v,A) + +2-D convolution of the matrix `A` with the 2-D separable kernel generated by +the vector `u` and row-vector `v`. +Uses 2-D FFT algorithm. +""" +function conv(u::AbstractVector{T}, v::Transpose{T,<:AbstractVector}, A::AbstractMatrix{T}) where T + # Arbitrary indexing offsets not implemented + if any(conv_axis_with_offset, (axes(u)..., axes(v)..., axes(A)...)) + throw(ArgumentError("offset axes not supported")) + end + m = length(u) + size(A, 1) - 1 + n = length(v) + size(A, 2) - 1 + B = zeros(T, m, n) + B[CartesianIndices(A)] = A + u = fft(_zeropad(u, m)) + vt = fft(_zeropad(transpose(v), n)) + C = ifft(fft(B) .*= u .* transpose(vt)) + if T <: Real + return real(C) + end + return C +end + + +end \ No newline at end of file diff --git a/src/dspbase.jl b/src/dspbase.jl index b86ff43ed..e1edc32e6 100644 --- a/src/dspbase.jl +++ b/src/dspbase.jl @@ -1,5 +1,7 @@ # This file was formerly a part of Julia. License is MIT: https://julialang.org/license +using .Convolutions: _zeropad_keep_offset, conv + const SMALL_FILT_CUTOFF = 66 """ @@ -162,652 +164,6 @@ function deconv(b::StridedVector{T}, a::StridedVector{T}) where T filt(b, a, x) end - -""" - _zeropad!(padded::AbstractVector, - u::AbstractVector, - padded_axes = axes(padded), - data_dest::Tuple = (1,), - data_region = CartesianIndices(u)) - -Place the portion of `u` specified by `data_region` into `padded`, starting at -location `data_dest`. Sets the rest of `padded` to zero. This will mutate -`padded`. `padded_axes` must correspond to the axes of `padded`. - -""" -@inline function _zeropad!( - padded::AbstractVector, - u::AbstractVector, - padded_axes = axes(padded), - data_dest::Tuple = (first(padded_axes[1]),), - data_region = CartesianIndices(u), -) - datasize = length(data_region) - # Use axes to accommodate arrays that do not start at index 1 - data_first_i = first(data_region)[1] - dest_first_i = data_dest[1] - copyto!(padded, dest_first_i, u, data_first_i, datasize) - padded[first(padded_axes[1]):dest_first_i - 1] .= 0 - padded[dest_first_i + datasize : end] .= 0 - - padded -end - -@inline function _zeropad!( - padded::AbstractArray, - u::AbstractArray, - padded_axes = axes(padded), - data_dest::Tuple = first.(padded_axes), - data_region = CartesianIndices(u), -) - fill!(padded, zero(eltype(padded))) - dest_axes = UnitRange.(data_dest, data_dest .+ size(data_region) .- 1) - dest_region = CartesianIndices(dest_axes) - copyto!(padded, dest_region, u, data_region) - - padded -end - -""" - _zeropad(u, padded_size, [data_dest, data_region]) - -Creates and returns a new base-1 index array of size `padded_size`, with the -section of `u` specified by `data_region` copied into the region of the new - array as specified by `data_dest`. All other values will be initialized to - zero. - -If either `data_dest` or `data_region` is not specified, then the defaults -described in [`_zeropad!`](@ref) will be used. -""" -function _zeropad(u, padded_size, args...) - padded = similar(u, padded_size) - _zeropad!(padded, u, axes(padded), args...) -end - -function _zeropad_keep_offset(u, padded_size, u_axes, args...) - ax_starts = first.(u_axes) - new_axes = UnitRange.(ax_starts, ax_starts .+ padded_size .- 1) - _zeropad!(similar(u, new_axes), u, args...) -end - -function _zeropad_keep_offset( - u, padded_size, ::NTuple{<:Any, Base.OneTo{Int}}, args... -) - _zeropad(u, padded_size, args...) -end - -""" - _zeropad_keep_offset(u, padded_size, [data_dest, dat_region]) - -Like [`_zeropad`](@ref), but retains the first index of `u` when creating a new -array. -""" -function _zeropad_keep_offset(u, padded_size, args...) - _zeropad_keep_offset(u, padded_size, axes(u), args...) -end - -""" -Estimate the number of floating point multiplications per output sample for an -overlap-save algorithm with fft size `nfft`, and filter size `nb`. -""" -os_fft_complexity(nfft, nb) = (nfft * log2(nfft) + nfft) / (nfft - nb + 1) - -""" -Determine the length of FFT that minimizes the number of multiplications per -output sample for an overlap-save convolution of vectors of size `nb` and `nx`. -""" -function optimalfftfiltlength(nb, nx) - nfull = nb + nx - 1 - - # Step through possible nffts and find the nfft that minimizes complexity - # Assumes that complexity is convex - first_pow2 = ceil(Int, log2(nb)) - max_pow2 = ceil(Int, log2(nfull)) - prev_complexity = os_fft_complexity(2 ^ first_pow2, nb) - pow2 = first_pow2 + 1 - while pow2 <= max_pow2 - new_complexity = os_fft_complexity(2 ^ pow2, nb) - new_complexity > prev_complexity && break - prev_complexity = new_complexity - pow2 += 1 - end - nfft = pow2 > max_pow2 ? 2 ^ max_pow2 : 2 ^ (pow2 - 1) - - if nfft > nfull - # If nfft > nfull, then it's better to find next fast power - nfft = nextfastfft(nfull) - end - - nfft -end - -""" -Prepare buffers and FFTW plans for convolution. The two buffers, tdbuff and -fdbuff may be an alias of each other, because complex convolution only requires -one buffer. The plans are mutating where possible, and the inverse plan is -unnormalized. -""" -@inline function os_prepare_conv(u::AbstractArray{T, N}, - nffts) where {T<:Real, N} - tdbuff = similar(u, nffts) - bufsize = ntuple(i -> i == 1 ? nffts[i] >> 1 + 1 : nffts[i], N) - fdbuff = similar(u, Complex{T}, NTuple{N, Int}(bufsize)) - - p = plan_rfft(tdbuff) - ip = plan_brfft(fdbuff, nffts[1]) - - tdbuff, fdbuff, p, ip -end - -@inline function os_prepare_conv(u::AbstractArray{<:Complex}, nffts) - buff = similar(u, nffts) - - p = plan_fft!(buff) - ip = inv(p).p - - buff, buff, p, ip # Only one buffer for complex -end - -""" -Transform the smaller convolution input to frequency domain, and return it in a -new array. However, the contents of `buff` may be modified. -""" -@inline function os_filter_transform!(buff::AbstractArray{<:Real}, p) - p * buff -end - -@inline function os_filter_transform!(buff::AbstractArray{<:Complex}, p!) - copy(p! * buff) # p operates in place on buff -end - -""" -Take a block of data, and convolve it with the smaller convolution input. This -may modify the contents of `tdbuff` and `fdbuff`, and the result will be in -`tdbuff`. -""" -@inline function os_conv_block!(tdbuff::AbstractArray{<:Real}, - fdbuff::AbstractArray, - filter_fd, - p, - ip) - mul!(fdbuff, p, tdbuff) - fdbuff .*= filter_fd - mul!(tdbuff, ip, fdbuff) -end - -"Like the real version, but only operates on one buffer" -@inline function os_conv_block!(buff::AbstractArray{<:Complex}, - ::AbstractArray, # Only one buffer for complex - filter_fd, - p!, - ip!) - p! * buff # p! operates in place on buff - buff .*= filter_fd - ip! * buff # ip! operates in place on buff -end - -# Used by `unsafe_conv_kern_os!` to handle blocks of input data that need to be padded. -# -# For a given number of edge dimensions, convolve all regions along the -# perimeter that have that number of edge dimensions -# -# For a 3d cube, if n_edges = 1, this means all faces. If n_edges = 2, then -# all edges. Finally, if n_edges = 3, then all corners. -# -# This needs to be a separate function for subsets to generate tuple elements, -# which is only the case if the number of edges is a Val{n} type. Iterating over -# the number of edges with Val{n} is inherently type unstable, so this function -# boundary allows dispatch to make efficient code for each number of edge -# dimensions. -function unsafe_conv_kern_os_edge!( - # These arrays and buffers will be mutated - out::AbstractArray{<:Any, N}, - tdbuff, - fdbuff, - perimeter_range, - # Number of edge dimensions to pad and convolve - n_edges::Val, - # Data to be convolved - u, - filter_fd, - # FFTW plans - p, - ip, - # Sizes, ranges, and other pre-calculated constants - # - ## ranges listing center and edge blocks - edge_ranges, - center_block_ranges, - ## size and axis information - all_dims, # 1:N - su, - u_start, - sv, - nffts, - out_start, - out_stop, - save_blocksize, - sout_deficit, # How many output samples are missing if nffts > sout - tdbuff_axes, -) where N - # Iterate over all possible combinations of edge dimensions for a number of - # edges - # - # For a 3d cube with n_edges = 1, this will specify the top and bottom faces - # (edge_dims = (1,)), then the left and right faces (edge_dims = (2,)), then - # the front and back faces (edge_dims = (3,)) - for edge_dims in subsets(all_dims, n_edges) - # Specify a region on the perimeter by combining an edge block index for - # each dimension on an edge, and the central blocks for dimensions not - # on an edge. - # - # First make all entries equal to the center blocks: - copyto!(perimeter_range, 1, center_block_ranges, 1, N) - - # For the dimensions chosen to be on an edge (edge_dims), get the - # ranges of the blocks that would need to be padded (lie on an edge) - # in that dimension. - # - # There can be one to two such ranges for each dimension, because with - # some inputs sizes the whole convolution is just one range - # (one edge block), or the padding will be needed at both the leading - # and trailing side of that dimension - selected_edge_ranges = getindex.(Ref(edge_ranges), edge_dims) - - # Visit each combination of edge ranges for the edge dimensions chosen. - # For a 3d cube with n_edges = 1 and edge_dims = (1,), this will visit - # the top face, and then the bottom face. - for perimeter_edge_ranges in Iterators.ProductIterator(selected_edge_ranges) - # The center region for non-edge dimensions has been specified above, - # so finish specifying the region of the perimeter for this edge - # block - for (i, dim) in enumerate(edge_dims) - perimeter_range[dim] = perimeter_edge_ranges[i] - end - - # Region of block indices, not data indices! - block_region = CartesianIndices( - NTuple{N, UnitRange{Int}}(perimeter_range) - ) - for block_pos in block_region - # Figure out which portion of the input data should be transformed - - block_idx = convert(NTuple{N, Int}, block_pos) - ## data_offset is NOT the beginning of the region that will be - ## convolved, but is instead the beginning of the unaliased data. - data_offset = save_blocksize .* (block_idx .- 1) - ## How many zeros will need to be added before the data - pad_before = max.(0, sv .- data_offset .- 1) - data_ideal_stop = data_offset .+ save_blocksize - ## How many zeros will need to be added after the data - pad_after = max.(0, data_ideal_stop .- su) - - ## Data indices, not block indices - data_region = CartesianIndices( - UnitRange.( - u_start .+ data_offset .- sv .+ pad_before .+ 1, - u_start .+ data_ideal_stop .- pad_after .- 1 - ) - ) - - # Convolve portion of input - - _zeropad!(tdbuff, u, tdbuff_axes, pad_before .+ 1, data_region) - os_conv_block!(tdbuff, fdbuff, filter_fd, p, ip) - - # Save convolved result to output - - block_out_stop = min.( - out_start .+ data_offset .+ save_blocksize .- 1, - out_stop - ) - block_out_region = CartesianIndices( - UnitRange.(out_start .+ data_offset, block_out_stop) - ) - ## If the input could not fill tdbuff, account for that before - ## copying the convolution result to the output - u_deficit = max.(0, pad_after .- sv .+ 1) - valid_buff_region = CartesianIndices( - UnitRange.(sv, nffts .- u_deficit .- sout_deficit) - ) - copyto!(out, block_out_region, tdbuff, valid_buff_region) - end - end - end -end - -# Assumes u is larger than, or the same size as, v -# nfft should be greater than or equal to 2*sv-1 -function unsafe_conv_kern_os!(out, - output_indices, - u::AbstractArray{<:Any, N}, - v, - nffts) where N - sout = size(out) - su = size(u) - sv = size(v) - u_start = first.(axes(u)) - out_start = Tuple(first(output_indices)) - out_stop = Tuple(last(output_indices)) - ideal_save_blocksize = nffts .- sv .+ 1 - # Number of samples that are "missing" if the output is smaller than the - # valid portion of the convolution - sout_deficit = max.(0, ideal_save_blocksize .- sout) - # Size of the valid portion of the convolution result - save_blocksize = ideal_save_blocksize .- sout_deficit - nblocks = cld.(sout, save_blocksize) - - # Pre-allocation - tdbuff, fdbuff, p, ip = os_prepare_conv(out, nffts) - tdbuff_axes = axes(tdbuff) - - # Transform the smaller filter - _zeropad!(tdbuff, v) - filter_fd = os_filter_transform!(tdbuff, p) - filter_fd .*= 1 / prod(nffts) # Normalize once for brfft - - # block indices for center blocks, which need no padding - first_center_blocks = cld.(sv .- 1, save_blocksize) .+ 1 - last_center_blocks = fld.(su, save_blocksize) - center_block_ranges = UnitRange.(first_center_blocks, last_center_blocks) - - # block index ranges for blocks that need to be padded - # Corresponds to the leading and trailing side of a dimension, or if there - # are no center blocks, corresponds to the whole dimension - edge_ranges = map(nblocks, first_center_blocks, last_center_blocks) do nblock, firstfull, lastfull - lastfull > 1 ? [1:firstfull - 1, lastfull + 1 : nblock] : [1:nblock] - end - all_dims = 1:N - val_dims = ntuple(Val, Val(N)) - # Buffer to store ranges of indices for a single region of the perimeter - perimeter_range = Vector{UnitRange{Int}}(undef, N) - - # Convolve all blocks that require padding. - # - # This is accomplished by dividing the perimeter of the volume into - # subsections, where the division is done by the number of edge dimensions. - # For a 3d cube, this convolves the perimeter in the following order: - # - # Number of Edge Dimensions | Convolved Region - # --------------------------+----------------- - # 1 | Faces of Cube - # 2 | Edges of Cube - # 3 | Corners of Cube - # - for n_edges in val_dims - unsafe_conv_kern_os_edge!( - # These arrays and buffers will be mutated - out, - tdbuff, - fdbuff, - perimeter_range, - # Number of edge dimensions to pad and convolve - n_edges, - # Data to be convolved - u, - filter_fd, - # FFTW plans - p, - ip, - # Sizes, ranges, and other pre-calculated constants - # - ## ranges listing center and edge blocks - edge_ranges, - center_block_ranges, - ## size and axis information - all_dims, # 1:N - su, - u_start, - sv, - nffts, - out_start, - out_stop, - save_blocksize, - sout_deficit, - tdbuff_axes) # How many output samples are missing if nffts > sout - end - - tdbuff_region = CartesianIndices(tdbuff) - # Portion of buffer with valid result of convolution - valid_buff_region = CartesianIndices(UnitRange.(sv, nffts)) - # Iterate over block indices (not data indices) that do not need to be padded - for block_pos in CartesianIndices(center_block_ranges) - # Calculate portion of data to transform - - block_idx = convert(NTuple{N, Int}, block_pos) - ## data_offset is NOT the beginning of the region that will be - ## convolved, but is instead the beginning of the unaliased data. - data_offset = save_blocksize .* (block_idx .- 1) - data_stop = data_offset .+ save_blocksize - data_region = CartesianIndices( - UnitRange.(u_start .+ data_offset .- sv .+ 1, u_start .+ data_stop .- 1) - ) - - # Convolve this portion of the data - - copyto!(tdbuff, tdbuff_region, u, data_region) - os_conv_block!(tdbuff, fdbuff, filter_fd, p, ip) - - # Save convolved result to output - - block_out_region = CartesianIndices( - UnitRange.(data_offset .+ out_start, data_stop .+ out_start .- 1) - ) - copyto!(out, block_out_region, tdbuff, valid_buff_region) - end - - out -end - -function _conv_kern_fft!(out::AbstractArray{T, N}, - output_indices, - u::AbstractArray{<:Real, N}, - v::AbstractArray{<:Real, N}) where {T<:Real, N} - outsize = size(output_indices) - nffts = nextfastfft(outsize) - padded = _zeropad!(similar(u, T, nffts), u) - p = plan_rfft(padded) - uf = p * padded - _zeropad!(padded, v) - vf = p * padded - uf .*= vf - raw_out = irfft(uf, nffts[1]) - copyto!(out, - output_indices, - raw_out, - CartesianIndices(outsize)) -end -function _conv_kern_fft!(out::AbstractArray{T}, output_indices, u, v) where {T} - outsize = size(output_indices) - nffts = nextfastfft(outsize) - upad = _zeropad!(similar(u, T, nffts), u) - vpad = _zeropad!(similar(v, T, nffts), v) - p! = plan_fft!(upad) - ip! = inv(p!) - p! * upad # Operates in place on upad - p! * vpad - upad .*= vpad - ip! * upad - copyto!(out, - output_indices, - upad, - CartesianIndices(outsize)) -end - -function _conv_td!(out, output_indices, u::AbstractArray{<:Number, N}, v::AbstractArray{<:Number, N}) where {N} - index_offset = first(CartesianIndices(u)) + first(CartesianIndices(v)) - first(output_indices) - checkbounds(out, output_indices) - fill!(out, zero(eltype(out))) - if size(u, 1) ≤ size(v, 1) # choose more efficient iteration order - for m in CartesianIndices(u), n in CartesianIndices(v) - @inbounds out[n+m - index_offset] = muladd(u[m], v[n], out[n+m - index_offset]) - end - else - for n in CartesianIndices(v), m in CartesianIndices(u) - @inbounds out[n+m - index_offset] = muladd(u[m], v[n], out[n+m - index_offset]) - end - end - return out -end - -# whether the given axis are to be considered to carry an offset for `conv!` and `conv` -conv_axis_with_offset(::Base.OneTo) = false -conv_axis_with_offset(a::Any) = throw(ArgumentError(lazy"unsupported axis type $(typeof(a))")) - -function conv_axes_with_offset(as::Tuple...) - with_offset = ((map(a -> map(conv_axis_with_offset, a), as)...)...,) - if !allequal(with_offset) - throw(ArgumentError("cannot mix offset and non-offset axes")) - end - return !isempty(with_offset) && first(with_offset) -end - -const FFTTypes = Union{Float32, Float64, ComplexF32, ComplexF64} - -""" - conv!(out, u, v; algorithm=:auto) - -Convolution of two arrays `u` and `v` with the result stored in `out`. `out` -must be large enough to store the entire result; if it is even larger, the -excess entries will be zeroed. - -`out`, `u`, and `v` can be N-dimensional arrays, with arbitrary indexing -offsets. If none of them has offset axes, -`size(out,d) ≥ size(u,d) + size(v,d) - 1` must hold. If both input and output -have offset axes, `firstindex(out,d) ≤ firstindex(u,d) + firstindex(v,d)` and -`lastindex(out,d) ≥ lastindex(u,d) + lastindex(v,d)` must hold (for d = 1,...,N). -A mix of offset and non-offset axes is not permitted. - -The `algorithm` keyword allows choosing the algorithm to use: -* `:direct`: Evaluates the convolution sum in time domain. -* `:fft_simple`: Evaluates the convolution as a product in the frequency domain. -* `:fft_overlapsave`: Evaluates the convolution block-wise as a product in the - frequency domain, overlapping the resulting blocks. -* `:fft`: Selects the faster of `:fft_simple` and `:fft_overlapsave` (as - estimated from the input size). -* `:fast`: Selects the fastest of `:direct`, `:fft_simple` and - `:fft_overlapsave` (as estimated from the input size). -* `:auto` (default): Equivalent to `:fast` if the data type is known to be - suitable for FFT-based computation, equivalent to `:direct` otherwise. - -!!! warning - The choices made by `:fft`, `:fast`, and `:auto` are based on performance - heuristics which may not result in the fastest algorithm in all cases. If - best performance for a certain size/type combination is required, it is - advised to do individual benchmarking and explicitly specify the desired - algorithm. -""" -function conv!( - out::AbstractArray{T, N}, - u::AbstractArray{<:Number, N}, - v::AbstractArray{<:Number, N}; - algorithm=:auto -) where {T<:Number, N} - offset = conv_axes_with_offset(axes(out), axes(u), axes(v)) ? 0 : 1 - output_indices = CartesianIndices(map(axes(out), axes(u), axes(v)) do ao, au, av - return (first(au)+first(av) : last(au)+last(av)) .- offset - end) - - if algorithm===:auto - algorithm = T <: FFTTypes ? :fast : :direct - end - if algorithm===:fast - if length(u) * length(v) < 2^16 # TODO: better heuristic - algorithm = :direct - else - algorithm = :fft - end - end - if algorithm===:direct - return _conv_td!(out, output_indices, u, v) - else - if output_indices != CartesianIndices(out) - fill!(out, zero(eltype(out))) - end - os_nffts = length(u) >= length(v) ? map(optimalfftfiltlength, size(v), size(u)) : map(optimalfftfiltlength, size(u), size(v)) - if algorithm===:fft - if any(os_nffts .< size(output_indices)) - algorithm = :fft_overlapsave - else - algorithm = :fft_simple - end - end - if algorithm === :fft_overlapsave - # v should be smaller than u for good performance - if length(u) >= length(v) - return unsafe_conv_kern_os!(out, output_indices, u, v, os_nffts) - else - return unsafe_conv_kern_os!(out, output_indices, v, u, os_nffts) - end - elseif algorithm === :fft_simple - return _conv_kern_fft!(out, output_indices, u, v) - else - throw(ArgumentError("algorithm must be :auto, :fast, :direct, :fft, :fft_simple, or :fft_overlapsave")) - end - end -end - -function conv_output_axes(au::Tuple, av::Tuple) - if conv_axes_with_offset(au, av) - return map((au, av) -> first(au)+first(av):last(au)+last(av), au, av) - else - return map((au, av) -> Base.OneTo(last(au) + last(av) - 1), au, av) - end -end - -""" - conv(u, v; algorithm) - -Convolution of two arrays. A convolution algorithm is automatically chosen among -direct convolution, FFT, or FFT overlap-save, depending on the size of the -input, unless explicitly specified with the `algorithm` keyword argument; see -[`conv!`](@ref) for details. -""" -function conv( - u::AbstractArray{Tu, N}, v::AbstractArray{Tv, N}; kwargs... -) where {Tu<:Number, Tv<:Number, N} - T = promote_type(Tu, Tv) - out_axes = conv_output_axes(axes(u), axes(v)) - out = similar(u, T, out_axes) - return conv!(out, u, v; kwargs...) -end - -function conv(A::AbstractArray{<:Number, M}, - B::AbstractArray{<:Number, N}; kwargs...) where {M, N} - if (M < N) - conv(cat(A, dims=N)::AbstractArray{eltype(A), N}, B; kwargs...) - else - @assert M > N - conv(A, cat(B, dims=M)::AbstractArray{eltype(B), M}; kwargs...) - end -end - -""" - conv(u,v,A) - -2-D convolution of the matrix `A` with the 2-D separable kernel generated by -the vector `u` and row-vector `v`. -Uses 2-D FFT algorithm. -""" -function conv(u::AbstractVector{T}, v::Transpose{T,<:AbstractVector}, A::AbstractMatrix{T}) where T - # Arbitrary indexing offsets not implemented - if any(conv_axis_with_offset, (axes(u)..., axes(v)..., axes(A)...)) - throw(ArgumentError("offset axes not supported")) - end - m = length(u) + size(A, 1) - 1 - n = length(v) + size(A, 2) - 1 - B = zeros(T, m, n) - B[CartesianIndices(A)] = A - u = fft(_zeropad(u, m)) - vt = fft(_zeropad(transpose(v), n)) - C = ifft(fft(B) .*= u .* transpose(vt)) - if T <: Real - return real(C) - end - return C -end - - dsp_reverse(v::AbstractVector, ::Tuple{Base.OneTo}) = reverse(v) function dsp_reverse(v::AbstractVector, vaxes::Tuple{AbstractUnitRange}) vsize = length(v) diff --git a/src/util.jl b/src/util.jl index ca0395d01..520325e24 100644 --- a/src/util.jl +++ b/src/util.jl @@ -4,6 +4,7 @@ import Base: * using LinearAlgebra: mul!, BLAS using FFTW using Statistics: mean +using ..Convolutions: nextfastfft export hilbert, fftintype, @@ -103,37 +104,6 @@ fftabs2type(::Type{Complex{T}}) where {T<:FFTW.fftwReal} = T fftabs2type(::Type{T}) where {T<:FFTW.fftwReal} = T fftabs2type(::Type{T}) where {T<:Union{Real,Complex}} = Float64 -# Get next fast FFT size for a given signal length -const FAST_FFT_SIZES = (2, 3, 5, 7) - -""" - nextfastfft(n::Integer) - nextfastfft(ns::Tuple) - -Return an estimate for the padded size of an array that -is optimal for the performance of an n-dimensional FFT. - -For `Tuple` inputs, this returns a `Tuple` that is the size -of the padded array. - -For `Integer` inputs, this returns the closest product of 2, 3, 5, and 7 -greater than or equal to `n`. -FFTW contains optimized kernels for these sizes and computes -Fourier transforms of inputs that are a product of these -sizes faster than for inputs of other sizes. - -!!! warning - - Currently, `nextfastfft(ns::Tuple)` is implemented as - `nextfastfft.(ns)`. This may change in future releases if - a better estimation model is found. - - It is recommended to use `nextfastfft.(ns)` to obtain - a tuple of padded lengths for 1D FFTs. -""" -nextfastfft(n::Integer) = nextprod(FAST_FFT_SIZES, n) -nextfastfft(ns::Tuple{Vararg{Integer}}) = nextfastfft.(ns) - ## COMMON DSP TOOLS diff --git a/test/dsp.jl b/test/dsp.jl index 897023a15..a34c7a251 100644 --- a/test/dsp.jl +++ b/test/dsp.jl @@ -2,9 +2,8 @@ # TODO: parameterize conv tests using Test, OffsetArrays using DSP: filt, filt!, deconv, conv, xcorr, - optimalfftfiltlength, unsafe_conv_kern_os!, _conv_kern_fft! nextfastfft - +using DSP.Convolutions: optimalfftfiltlength, unsafe_conv_kern_os!, _conv_kern_fft! @testset "filt" begin From 9f4bc12df9b31f6e46d8f82f99ae1e9e4e54408d Mon Sep 17 00:00:00 2001 From: wheeheee <104880306+wheeheee@users.noreply.github.com> Date: Sun, 15 Dec 2024 18:02:27 +0800 Subject: [PATCH 2/6] move xcorr to Convolutions.jl --- src/DSP.jl | 2 +- src/Filters/response.jl | 2 +- src/convolutions.jl | 75 +++++++++++++++++++++++++++++++++++++++- src/dspbase.jl | 76 ----------------------------------------- src/lpc.jl | 2 +- src/util.jl | 3 +- test/dsp.jl | 6 ++-- 7 files changed, 81 insertions(+), 85 deletions(-) diff --git a/src/DSP.jl b/src/DSP.jl index 86cd90624..468c3ec3e 100644 --- a/src/DSP.jl +++ b/src/DSP.jl @@ -2,7 +2,7 @@ module DSP using LinearAlgebra: mul! -export deconv, filt, filt!, xcorr +export deconv, filt, filt! # This function has methods added in `periodograms` but is not exported, # so we define it here so one can do `DSP.allocate_output` instead of diff --git a/src/Filters/response.jl b/src/Filters/response.jl index 5d814ddef..add3f6bcd 100644 --- a/src/Filters/response.jl +++ b/src/Filters/response.jl @@ -4,7 +4,7 @@ # Frequency response of a digital filter # -using ..DSP: xcorr +using ..Convolutions: xcorr """ H, w = freqresp(filter::FilterCoefficients) diff --git a/src/convolutions.jl b/src/convolutions.jl index cfd12ddf4..fbf3f3b59 100644 --- a/src/convolutions.jl +++ b/src/convolutions.jl @@ -4,7 +4,7 @@ using LinearAlgebra: Transpose, mul! using IterTools: subsets using FFTW -export conv, conv!, nextfastfft +export conv, conv!, xcorr, nextfastfft # Get next fast FFT size for a given signal length const FAST_FFT_SIZES = (2, 3, 5, 7) @@ -681,5 +681,78 @@ function conv(u::AbstractVector{T}, v::Transpose{T,<:AbstractVector}, A::Abstrac return C end +dsp_reverse(v::AbstractVector, ::Tuple{Base.OneTo}) = reverse(v) +function dsp_reverse(v::AbstractVector, vaxes::Tuple{AbstractUnitRange}) + vsize = length(v) + reflected_start = - first(only(vaxes)) - vsize + 1 + reflected_axes = (reflected_start : reflected_start + vsize - 1,) + out = similar(v, reflected_axes) + copyto!(out, reflected_start, Iterators.reverse(v), 1, vsize) +end + + +""" + xcorr(u; padmode::Symbol=:none, scaling::Symbol=:none) + xcorr(u, v; padmode::Symbol=:none, scaling::Symbol=:none) + +With two arguments, compute the cross-correlation of two vectors, by calculating +the similarity between `u` and `v` with various offsets of `v`. Delaying `u` +relative to `v` will shift the result to the right. If one argument is provided, +calculate `xcorr(u, u; kwargs...)`. + +The size of the output depends on the `padmode` keyword argument: with `padmode = +:none` the length of the result will be `length(u) + length(v) - 1`, as with `conv`. +With `padmode = :longest`, the shorter of the arguments will be padded so they +are of equal length. This gives a result with length `2*max(length(u), length(v))-1`, +with the zero-lag condition at the center. + +The keyword argument `scaling` can be provided. Possible arguments are the default +`:none` and `:biased`. `:biased` is valid only if the vectors have the same length, +or only one vector is provided, dividing the result by `length(u)`. + +# Examples + +```jldoctest +julia> xcorr([1,2,3],[1,2,3]) +5-element Vector{Int64}: + 3 + 8 + 14 + 8 + 3 +``` +""" +function xcorr( + u::AbstractVector, v::AbstractVector; padmode::Symbol=:none, scaling::Symbol=:none +) + su = size(u, 1); sv = size(v, 1) + + if scaling == :biased && su != sv + throw(DimensionMismatch("scaling only valid for vectors of same length")) + end + + if padmode == :longest + if su < sv + u = _zeropad_keep_offset(u, sv) + elseif sv < su + v = _zeropad_keep_offset(v, su) + end + elseif padmode != :none + throw(ArgumentError("padmode keyword argument must be either :none or :longest")) + end + + res = conv(u, dsp_reverse(conj(v), axes(v))) + if scaling == :biased + res = _normalize!(res, su) + end + + return res +end + +_normalize!(x::AbstractArray{<:Integer}, sz::Int) = (x ./ sz) # does not mutate x +_normalize!(x::AbstractArray, sz::Int) = (x ./= sz) + +# TODO: write specialized (r/)fft-ed autocorrelation functions +xcorr(u::AbstractVector; kwargs...) = xcorr(u, u; kwargs...) end \ No newline at end of file diff --git a/src/dspbase.jl b/src/dspbase.jl index e1edc32e6..d93e328ed 100644 --- a/src/dspbase.jl +++ b/src/dspbase.jl @@ -1,7 +1,5 @@ # This file was formerly a part of Julia. License is MIT: https://julialang.org/license -using .Convolutions: _zeropad_keep_offset, conv - const SMALL_FILT_CUTOFF = 66 """ @@ -163,77 +161,3 @@ function deconv(b::StridedVector{T}, a::StridedVector{T}) where T x[1] = 1 filt(b, a, x) end - -dsp_reverse(v::AbstractVector, ::Tuple{Base.OneTo}) = reverse(v) -function dsp_reverse(v::AbstractVector, vaxes::Tuple{AbstractUnitRange}) - vsize = length(v) - reflected_start = - first(only(vaxes)) - vsize + 1 - reflected_axes = (reflected_start : reflected_start + vsize - 1,) - out = similar(v, reflected_axes) - copyto!(out, reflected_start, Iterators.reverse(v), 1, vsize) -end - - -""" - xcorr(u; padmode::Symbol=:none, scaling::Symbol=:none) - xcorr(u, v; padmode::Symbol=:none, scaling::Symbol=:none) - -With two arguments, compute the cross-correlation of two vectors, by calculating -the similarity between `u` and `v` with various offsets of `v`. Delaying `u` -relative to `v` will shift the result to the right. If one argument is provided, -calculate `xcorr(u, u; kwargs...)`. - -The size of the output depends on the `padmode` keyword argument: with `padmode = -:none` the length of the result will be `length(u) + length(v) - 1`, as with `conv`. -With `padmode = :longest`, the shorter of the arguments will be padded so they -are of equal length. This gives a result with length `2*max(length(u), length(v))-1`, -with the zero-lag condition at the center. - -The keyword argument `scaling` can be provided. Possible arguments are the default -`:none` and `:biased`. `:biased` is valid only if the vectors have the same length, -or only one vector is provided, dividing the result by `length(u)`. - -# Examples - -```jldoctest -julia> xcorr([1,2,3],[1,2,3]) -5-element Vector{Int64}: - 3 - 8 - 14 - 8 - 3 -``` -""" -function xcorr( - u::AbstractVector, v::AbstractVector; padmode::Symbol=:none, scaling::Symbol=:none -) - su = size(u, 1); sv = size(v, 1) - - if scaling == :biased && su != sv - throw(DimensionMismatch("scaling only valid for vectors of same length")) - end - - if padmode == :longest - if su < sv - u = _zeropad_keep_offset(u, sv) - elseif sv < su - v = _zeropad_keep_offset(v, su) - end - elseif padmode != :none - throw(ArgumentError("padmode keyword argument must be either :none or :longest")) - end - - res = conv(u, dsp_reverse(conj(v), axes(v))) - if scaling == :biased - res = _normalize!(res, su) - end - - return res -end - -_normalize!(x::AbstractArray{<:Integer}, sz::Int) = (x ./ sz) # does not mutate x -_normalize!(x::AbstractArray, sz::Int) = (x ./= sz) - -# TODO: write specialized (r/)fft-ed autocorrelation functions -xcorr(u::AbstractVector; kwargs...) = xcorr(u, u; kwargs...) diff --git a/src/lpc.jl b/src/lpc.jl index e9150fc76..258e06945 100644 --- a/src/lpc.jl +++ b/src/lpc.jl @@ -1,7 +1,7 @@ module LPC -using ..DSP: xcorr +using ..Convolutions: xcorr using LinearAlgebra: dot, BlasComplex, BLAS diff --git a/src/util.jl b/src/util.jl index 520325e24..135dddd58 100644 --- a/src/util.jl +++ b/src/util.jl @@ -1,10 +1,9 @@ module Util -using ..DSP: xcorr import Base: * using LinearAlgebra: mul!, BLAS using FFTW using Statistics: mean -using ..Convolutions: nextfastfft +using ..Convolutions: nextfastfft, xcorr export hilbert, fftintype, diff --git a/test/dsp.jl b/test/dsp.jl index a34c7a251..086470edd 100644 --- a/test/dsp.jl +++ b/test/dsp.jl @@ -1,9 +1,9 @@ # This file was formerly a part of Julia. License is MIT: https://julialang.org/license # TODO: parameterize conv tests using Test, OffsetArrays -using DSP: filt, filt!, deconv, conv, xcorr, - nextfastfft -using DSP.Convolutions: optimalfftfiltlength, unsafe_conv_kern_os!, _conv_kern_fft! +using DSP: filt, filt!, deconv +using DSP.Convolutions: conv, xcorr, nextfastfft, + optimalfftfiltlength, unsafe_conv_kern_os!, _conv_kern_fft! @testset "filt" begin From 22809b92f354f0d1f5ce2940b0f8dadc086370d0 Mon Sep 17 00:00:00 2001 From: wheeheee <104880306+wheeheee@users.noreply.github.com> Date: Sun, 15 Dec 2024 00:56:15 +0800 Subject: [PATCH 3/6] import conv, nextfastfft in deprecations.jl --- src/deprecated.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/deprecated.jl b/src/deprecated.jl index 3675be8d2..c30828145 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -1,7 +1,8 @@ # deprecations in 0.8 -import .Util.nextfastfft +import .Convolutions.nextfastfft @deprecate nextfastfft(ns...) nextfastfft.(ns) false +import .Convolutions.conv @deprecate (conv(u::AbstractVector{T}, v::AbstractVector{T}, A::AbstractMatrix{T}) where T) conv(u, transpose(v), A) @deprecate( From 1e73e951679d06d2a513b89d96d60cd07f87b119 Mon Sep 17 00:00:00 2001 From: wheeheee <104880306+wheeheee@users.noreply.github.com> Date: Sun, 15 Dec 2024 17:32:38 +0800 Subject: [PATCH 4/6] import DSP.Convolutions in OffsetArraysExt.jl --- ext/OffsetArraysExt.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/OffsetArraysExt.jl b/ext/OffsetArraysExt.jl index 3b6a981eb..a8210c83b 100644 --- a/ext/OffsetArraysExt.jl +++ b/ext/OffsetArraysExt.jl @@ -1,7 +1,7 @@ module OffsetArraysExt -import DSP +import DSP.Convolutions import OffsetArrays -DSP.Convolutions.conv_axis_with_offset(::OffsetArrays.IdOffsetRange) = true +Convolutions.conv_axis_with_offset(::OffsetArrays.IdOffsetRange) = true end From a0d8d41ba772c155630dda0d79a1379ef443f1a7 Mon Sep 17 00:00:00 2001 From: wheeheee <104880306+wheeheee@users.noreply.github.com> Date: Sun, 15 Dec 2024 18:29:36 +0800 Subject: [PATCH 5/6] Fix internals.md --- docs/src/internals.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/src/internals.md b/docs/src/internals.md index 8ea63f24a..32b40f301 100644 --- a/docs/src/internals.md +++ b/docs/src/internals.md @@ -2,19 +2,19 @@ Functions here are not exported. ```@docs -DSP.os_fft_complexity -DSP.os_prepare_conv -DSP.os_conv_block! -DSP.os_filter_transform! +DSP.Convolutions.os_fft_complexity +DSP.Convolutions.optimalfftfiltlength +DSP.Convolutions.os_prepare_conv +DSP.Convolutions.os_conv_block! +DSP.Convolutions.os_filter_transform! DSP.Windows.makewindow DSP.Windows.padplot -DSP._zeropad -DSP._zeropad! -DSP._zeropad_keep_offset +DSP.Convolutions._zeropad +DSP.Convolutions._zeropad! +DSP.Convolutions._zeropad_keep_offset DSP.Filters._polyprep DSP.Filters.freq_eval DSP.Filters.build_grid DSP.Filters.lagrange_interp DSP.Periodograms.coherence_from_cs! -DSP.optimalfftfiltlength ``` \ No newline at end of file From a17ad59fedd0ff506448890da0314ac3a671d92f Mon Sep 17 00:00:00 2001 From: wheeheee <104880306+wheeheee@users.noreply.github.com> Date: Sun, 22 Dec 2024 12:49:46 +0800 Subject: [PATCH 6/6] use FFTW.fftwNumber as we're already using it in util / others use it too. --- src/convolutions.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/convolutions.jl b/src/convolutions.jl index fbf3f3b59..ce9860ef0 100644 --- a/src/convolutions.jl +++ b/src/convolutions.jl @@ -536,7 +536,7 @@ function conv_axes_with_offset(as::Tuple...) return !isempty(with_offset) && first(with_offset) end -const FFTTypes = Union{Float32,Float64,ComplexF32,ComplexF64} +const FFTTypes = FFTW.fftwNumber """ conv!(out, u, v; algorithm=:auto)