diff --git a/examples/Manifest.toml b/examples/Manifest.toml index aeecc76..a650675 100644 --- a/examples/Manifest.toml +++ b/examples/Manifest.toml @@ -342,9 +342,9 @@ version = "1.14.1" [[deps.JuliaInterpreter]] deps = ["CodeTracking", "InteractiveUtils", "Random", "UUIDs"] -git-tree-sha1 = "2984284a8abcfcc4784d95a9e2ea4e352dd8ede7" +git-tree-sha1 = "fc8504eca188aaae4345649ca6105806bc584b70" uuid = "aa1ae85d-cabe-5617-a682-6adf51b2e16a" -version = "0.9.36" +version = "0.9.37" [[deps.Krylov]] deps = ["LinearAlgebra", "Printf", "SparseArrays"] @@ -459,7 +459,7 @@ version = "5.1.2+0" [[deps.MPI]] deps = ["Distributed", "DocStringExtensions", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "PkgVersion", "PrecompileTools", "Requires", "Serialization", "Sockets"] -git-tree-sha1 = "8faa547a424cbd7eca2529c6ddf9929c4ec64e71" +git-tree-sha1 = "71c417a539693107d1b0b0d413cc58e3f743c937" repo-rev = "master" repo-url = "https://github.com/PetrKryslUCSD/MPI.jl.git" uuid = "da04e1cc-30fd-572f-bb4f-1f8673147195" @@ -652,9 +652,9 @@ version = "1.3.0" [[deps.Revise]] deps = ["CodeTracking", "Distributed", "FileWatching", "JuliaInterpreter", "LibGit2", "LoweredCodeUtils", "OrderedCollections", "REPL", "Requires", "UUIDs", "Unicode"] -git-tree-sha1 = "7f4228017b83c66bd6aa4fddeb170ce487e53bc7" +git-tree-sha1 = "834aedb1369919a7b2026d7e04c2d49a311d26f4" uuid = "295af30f-e4ad-537b-8983-00126c2a3abe" -version = "3.6.2" +version = "3.6.3" [[deps.SHA]] uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" @@ -837,9 +837,9 @@ version = "1.21.1" [[deps.XML2_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Zlib_jll"] -git-tree-sha1 = "6a451c6f33a176150f315726eba8b92fbfdb9ae7" +git-tree-sha1 = "a2fccc6559132927d4c5dc183e3e01048c6dcbd6" uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a" -version = "2.13.4+0" +version = "2.13.5+0" [[deps.Zlib_jll]] deps = ["Libdl"] diff --git a/examples/conc/pace.jl b/examples/conc/pace.jl index b6e63a6..39ffbd2 100644 --- a/examples/conc/pace.jl +++ b/examples/conc/pace.jl @@ -7,11 +7,6 @@ run(cmd) @info "======================================\nExpected 26 iterations" -cmd = `julia --project=. ./conc/shells/barrel_seq_driver.jl` -run(cmd) -@info "======================================\nExpected 69 iterations" - - cmd = `julia --project=. ./conc/shells/hyp_seq_driver.jl` run(cmd) @info "======================================\nExpected 28 iterations" @@ -21,3 +16,8 @@ cmd = `julia --project=. ./conc/lindef/fib_seq_driver.jl --Np 7` run(cmd) @info "======================================\nExpected 67 iterations" + +cmd = `julia --project=. ./conc/shells/barrel_seq_driver.jl` +run(cmd) +@info "======================================\nExpected 69 iterations" + diff --git a/examples/conc/pace_mpi.jl b/examples/conc/pace_mpi.jl index d48bae6..3698e6c 100644 --- a/examples/conc/pace_mpi.jl +++ b/examples/conc/pace_mpi.jl @@ -9,9 +9,7 @@ run(cmd) @info "======================================\nExpected 26 iterations" -cmd = `$(mpiexecjl) -n 7 julia --project=. ./conc/shells/barrel_mpi_driver.jl` -run(cmd) -@info "======================================\nExpected 69 iterations" + cmd = `$(mpiexecjl) -n 7 julia --project=. ./conc/shells/hyp_mpi_driver.jl` @@ -23,3 +21,6 @@ cmd = `$(mpiexecjl) -n 7 julia --project=. ./conc/lindef/fib_mpi_driver.jl` run(cmd) @info "======================================\nExpected 67 iterations" +cmd = `$(mpiexecjl) -n 7 julia --project=. ./conc/shells/barrel_mpi_driver.jl` +run(cmd) +@info "======================================\nExpected 69 iterations" \ No newline at end of file diff --git a/examples/mpi_experiments/mpi_experiment_4.jl b/examples/mpi_experiments/mpi_experiment_4.jl new file mode 100644 index 0000000..df7f834 --- /dev/null +++ b/examples/mpi_experiments/mpi_experiment_4.jl @@ -0,0 +1,20 @@ +# examples/03-reduce.jl +# This example shows how to use custom datatypes and reduction operators +# It computes the variance in parallel in a numerically stable way + +using MPI, Statistics + +MPI.Init() +const comm = MPI.COMM_WORLD +const root = 0 + +rank = MPI.Comm_rank(comm) + +X = fill(rank, 7) + +# Perform a sum reduction +X = MPI.Allreduce(X, MPI.SUM, comm) + +if MPI.Comm_rank(comm) == root + println("The sum of the arrays is: ", X) +end \ No newline at end of file diff --git a/examples/mpi_experiments/mpi_experiment_5.jl b/examples/mpi_experiments/mpi_experiment_5.jl new file mode 100644 index 0000000..923215e --- /dev/null +++ b/examples/mpi_experiments/mpi_experiment_5.jl @@ -0,0 +1,70 @@ +# examples/03-reduce.jl +# This example shows how to use custom datatypes and reduction operators +# It computes the variance in parallel in a numerically stable way + +using MPI, Statistics + +RBuffer = MPI.RBuffer +Op = MPI.Op +API = MPI.API +MPI_Op = MPI.MPI_Op +IN_PLACE = MPI.IN_PLACE +Comm = MPI.Comm +AbstractRequest = MPI.AbstractRequest +Request = MPI.Request +_doc_external = x -> "For more information, see the [MPI documentation]($x)." + +## Iallreduce + +# mutating +""" + Iallreduce!(sendbuf, recvbuf, op, comm::Comm, req::AbstractRequest=Request()) + Iallreduce!(sendrecvbuf, op, comm::Comm, req::AbstractRequest=Request()) + +Performs elementwise reduction using the operator `op` on the buffer `sendbuf`, storing +the result in the `recvbuf` of all processes in the group. + +If only one `sendrecvbuf` buffer is provided, then the operation is performed in-place. + +# See also +- [`Iallreduce`](@ref), to handle allocation of the output buffer. +- [`Op`](@ref) for details on reduction operators. + +# External links +$(_doc_external("MPI_Iallreduce")) +""" +function Iallreduce!(rbuf::RBuffer, op::Union{Op,MPI_Op}, comm::Comm, req::AbstractRequest=Request()) + # int MPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, + # MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, + # MPI_Request *request) + API.MPI_Iallreduce(rbuf.senddata, rbuf.recvdata, rbuf.count, rbuf.datatype, op, comm, req) + return req +end +Iallreduce!(rbuf::RBuffer, op, comm::Comm, req::AbstractRequest=Request()) = + Iallreduce!(rbuf, Op(op, eltype(rbuf)), comm, req) +Iallreduce!(sendbuf, recvbuf, op, comm::Comm, req::AbstractRequest=Request()) = + Iallreduce!(RBuffer(sendbuf, recvbuf), op, comm, req) + +# inplace +Iallreduce!(buf, op, comm::Comm, req::AbstractRequest=Request()) = Iallreduce!(IN_PLACE, buf, op, comm, req) + + +MPI.Init() +const comm = MPI.COMM_WORLD +const root = 0 + +rank = MPI.Comm_rank(comm) + +X = fill(rank, 7) + +# Perform a sum reduction +req = Iallreduce!(X, MPI.SUM, comm) +sleep(rand()) +MPI.Wait(req) + +if MPI.Comm_rank(comm) == root + println("Rank $(MPI.Comm_rank(comm)): The array is: ", X) + println("Rank $(MPI.Comm_rank(comm)): Should have gotten: ", sum(0:MPI.Comm_size(comm)-1)) +end + +MPI.Finalize() \ No newline at end of file diff --git a/src/DDCoNCMPIModule.jl b/src/DDCoNCMPIModule.jl index 9c7ae5d..ca44579 100644 --- a/src/DDCoNCMPIModule.jl +++ b/src/DDCoNCMPIModule.jl @@ -57,17 +57,16 @@ using LinearAlgebra using Statistics: mean using ..FENodeToPartitionMapModule: FENodeToPartitionMap using ShellStructureTopo -using MPI - using ..PartitionCoNCModule: CoNCPartitioningInfo, CoNCPartitionData, npartitions using ..FinEtoolsDDMethods: set_up_timers, update_timer!, reset_timers! - import ..CGModule: vec_copyto! import ..CGModule: vec_aypx! import ..CGModule: vec_ypax! import ..CGModule: vec_dot import Base: deepcopy +using MPI + torank(i) = i - 1 topartitionnumber(r) = r + 1 @@ -329,8 +328,6 @@ end mutable struct TwoLevelPreConditioner{DDC<:DDCoNCMPIComm, T, IT, FACTOR} ddcomm::DDC - napps::Int - nskip::Int n::IT buff_Phi::SparseMatrixCSC{T, IT} Kr_ff_factor::FACTOR @@ -340,8 +337,6 @@ end function TwoLevelPreConditioner(ddcomm::DDC, Phi) where {DDC<:DDCoNCMPIComm} comm = ddcomm.comm - napps = 0 - nskip = 0 partition = ddcomm.partition rank = ddcomm.partition.rank n = size(Phi, 1) @@ -373,29 +368,28 @@ function TwoLevelPreConditioner(ddcomm::DDC, Phi) where {DDC<:DDCoNCMPIComm} buff_Phi = P[pel.ldofs_own_only, :] buffPp = fill(zero(eltype(Kr_ff_factor)), nr) buffKiPp = fill(zero(eltype(Kr_ff_factor)), nr) - return TwoLevelPreConditioner(ddcomm, napps, nskip, n, buff_Phi, Kr_ff_factor, buffPp, buffKiPp) + return TwoLevelPreConditioner(ddcomm, n, buff_Phi, Kr_ff_factor, buffPp, buffKiPp) end function (pre::TwoLevelPreConditioner)(q::PV, p::PV) where {PV<:PartitionedVector} partition = p.ddcomm.partition _rhs_update_xt!(p) q.buffers.ownv .= 0 - pre.napps += 1 - if pre.napps > pre.nskip - # Narrow by the transformation - ld = partition.entity_list.own.ldofs_own_only - pre.buffPp .= pre.buff_Phi' * p.buffers.ownv[ld] - # Communicate - pre.buffPp .= MPI.Allreduce!(pre.buffPp, MPI.SUM, pre.ddcomm.comm) - # Solve the reduced problem - pre.buffKiPp .= pre.Kr_ff_factor \ pre.buffPp - # Expand by the transformation - ld = partition.entity_list.own.ldofs_own_only - q.buffers.ownv[ld] .= pre.buff_Phi * pre.buffKiPp - pre.napps = 0 - end + # Level 2, narrow by the transformation + ld = partition.entity_list.own.ldofs_own_only + pre.buffPp .= pre.buff_Phi' * p.buffers.ownv[ld] + # Level 2, communicate + req = MPI.Iallreduce!(pre.buffPp, MPI.SUM, pre.ddcomm.comm) # Level 1 q.buffers.extv .= partition.Kxt_ff_factor \ p.buffers.extv + # Level 2, wait for the communication + MPI.Wait(req) + # Level 2, solve the reduced problem + pre.buffKiPp .= pre.Kr_ff_factor \ pre.buffPp + # Level 2, expand by the transformation + ld = partition.entity_list.own.ldofs_own_only + q.buffers.ownv[ld] .= pre.buff_Phi * pre.buffKiPp + _lhs_update_xt!(q) q end