Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use fully parallel atomic initialization #86

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/PointNeighbors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ include("gpu.jl")
export foreach_point_neighbor, foreach_neighbor
export TrivialNeighborhoodSearch, GridNeighborhoodSearch, PrecomputedNeighborhoodSearch
export DictionaryCellList, FullGridCellList
export ParallelUpdate, SemiParallelUpdate, SerialUpdate
export ParallelUpdate, SemiParallelUpdate, SerialUpdate, ParallelIncrementalUpdate
export initialize!, update!, initialize_grid!, update_grid!
export PolyesterBackend, ThreadsDynamicBackend, ThreadsStaticBackend
export PeriodicBox, copy_neighborhood_search
Expand Down
2 changes: 1 addition & 1 deletion src/cell_lists/full_grid.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ struct FullGridCellList{C, LI, MINC, MAXC} <: AbstractCellList
end

function supported_update_strategies(::FullGridCellList{<:DynamicVectorOfVectors})
return (ParallelUpdate, SemiParallelUpdate, SerialUpdate)
return (ParallelUpdate, SemiParallelUpdate, ParallelIncrementalUpdate, SerialUpdate)
end

supported_update_strategies(::FullGridCellList) = (SemiParallelUpdate, SerialUpdate)
Expand Down
60 changes: 56 additions & 4 deletions src/nhs_grid.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,14 +106,25 @@ end
"""
ParallelUpdate()

Fully parallel update by using atomic operations to avoid race conditions when adding points
into the same cell.
Fully parallel initialization and update by using atomic operations to avoid race conditions
when adding points into the same cell.
This is not available for all cell list implementations, but is the default when available.

See [`GridNeighborhoodSearch`](@ref) for usage information.
"""
struct ParallelUpdate end

"""
ParallelIncrementalUpdate()

Like [`ParallelUpdate`](@ref), but only updates the cells that have changed.
This is generally slower than a full reinitialization with [`ParallelUpdate`](@ref),
but is included for benchmarking purposes.

See [`GridNeighborhoodSearch`](@ref) for usage information.
"""
struct ParallelIncrementalUpdate end

"""
SemiParallelUpdate()

Expand All @@ -138,7 +149,10 @@ See [`GridNeighborhoodSearch`](@ref) for usage information.
struct SerialUpdate end

# No update buffer needed for fully parallel update
@inline create_update_buffer(::ParallelUpdate, _, _) = nothing
@inline function create_update_buffer(::Union{ParallelUpdate, ParallelIncrementalUpdate},
_, _)
return nothing
end

@inline function create_update_buffer(::SemiParallelUpdate, cell_list, n_points)
# Create update buffer and initialize it with empty vectors
Expand Down Expand Up @@ -185,6 +199,35 @@ function initialize_grid!(neighborhood_search::GridNeighborhoodSearch, y::Abstra
return neighborhood_search
end

# WARNING! Undocumented, experimental feature:
# By default, determine the parallelization backend from the type of `y`.
# Optionally, pass a `KernelAbstractions.Backend` to run the KernelAbstractions.jl code
# on this backend. This can be useful to run GPU kernels on the CPU by passing
# `parallelization_backend = KernelAbstractions.CPU()`, even though `y isa Array`.
function initialize_grid!(neighborhood_search::GridNeighborhoodSearch{<:Any, ParallelUpdate},
y::AbstractMatrix; parallelization_backend = y)
(; cell_list) = neighborhood_search

empty!(cell_list)

if neighborhood_search.search_radius < eps()
# Cannot initialize with zero search radius.
# This is used in TrixiParticles when a neighborhood search is not used.
return neighborhood_search
end

@threaded parallelization_backend for point in axes(y, 2)
# Get cell index of the point's cell
point_coords = extract_svector(y, Val(ndims(neighborhood_search)), point)
cell = cell_coords(point_coords, neighborhood_search)

# Add point to corresponding cell
push_cell_atomic!(cell_list, cell, point)
end

return neighborhood_search
end

# WARNING! Undocumented, experimental feature:
# By default, determine the parallelization backend from the type of `x`.
# Optionally, pass a `KernelAbstractions.Backend` to run the KernelAbstractions.jl code
Expand Down Expand Up @@ -300,7 +343,8 @@ end

# Fully parallel update with atomic push.
# See the warning above. `parallelization_backend = nothing` will use `Polyester.@batch`.
function update_grid!(neighborhood_search::GridNeighborhoodSearch{<:Any, ParallelUpdate},
function update_grid!(neighborhood_search::Union{GridNeighborhoodSearch{<:Any, ParallelUpdate},
GridNeighborhoodSearch{<:Any, ParallelIncrementalUpdate}},
coords_fun::Function; parallelization_backend = nothing)
(; cell_list) = neighborhood_search

Expand Down Expand Up @@ -343,6 +387,14 @@ function update_grid!(neighborhood_search::GridNeighborhoodSearch{<:Any, Paralle
return neighborhood_search
end

# Note that this is only defined when a matrix `y` is passed. When updating with a function,
# it will fall back to the semi-parallel update.
function update_grid!(neighborhood_search::GridNeighborhoodSearch{<:Any, ParallelUpdate},
y::AbstractMatrix; parallelization_backend = y)
# The parallel (atomic) initialization is usually faster than the incremental update
initialize_grid!(neighborhood_search, y; parallelization_backend)
end

@propagate_inbounds function foreach_neighbor(f, system_coords, neighbor_system_coords,
neighborhood_search::GridNeighborhoodSearch,
point;
Expand Down
Loading